def time_distributed_dense(x, w, b=None, dropout=None, input_dim=None, output_dim=None, timesteps=None, activation='linear'): '''Apply y.w + b for every temporal slice y of x. ''' activation = activations.get(activation) if not input_dim: # won't work with TensorFlow input_dim = K.shape(x)[2] if not timesteps: # won't work with TensorFlow timesteps = K.shape(x)[1] if not output_dim: # won't work with TensorFlow output_dim = K.shape(w)[1] if dropout is not None and 0. < dropout < 1.: # apply the same dropout pattern at every timestep ones = K.ones_like(K.reshape(x[:, 0, :], (-1, input_dim))) dropout_matrix = K.dropout(ones, dropout) expanded_dropout_matrix = K.repeat(dropout_matrix, timesteps) x = K.in_train_phase(x * expanded_dropout_matrix, x) # collapse time dimension and batch dimension together x = K.reshape(x, (-1, input_dim)) x = K.dot(x, w) if b: x = x + b # reshape to 3D tensor x = K.reshape(activation(x), (-1, timesteps, output_dim)) return x
def get_constants(self, x): constants = [] if 0 < self.dropout_U < 1: ones = K.ones_like(K.reshape(x[:, 0, 0], (-1, 1))) ones = K.concatenate([ones] * self.output_dim, 1) B_U = K.in_train_phase(K.dropout(ones, self.dropout_U), ones) constants.append(B_U) else: constants.append(K.cast_to_floatx(1.)) if self.consume_less == 'cpu' and 0 < self.dropout_W < 1: input_shape = self.input_spec[0].shape input_dim = input_shape[-1] ones = K.ones_like(K.reshape(x[:, 0, 0], (-1, 1))) ones = K.concatenate([ones] * input_dim, 1) B_W = K.in_train_phase(K.dropout(ones, self.dropout_W), ones) constants.append(B_W) else: constants.append(K.cast_to_floatx(1.)) return constants
def dot_product_attention(self, x, seq_len=None, dropout=0.1, training=None): q, k, v = x logits = tf.matmul(q, k, transpose_b=True) if self.bias: logits += self.b if seq_len is not None: logits = self.mask_logits(logits, seq_len) weights = tf.nn.softmax(logits, name="attention_weights") weights = K.in_train_phase(K.dropout(weights, dropout), weights, training=training) x = tf.matmul(weights, v) return x
def get_constants(self, x): constants = [] if 0 < self.dropout_U < 1: ones = K.ones_like(K.reshape(x[:, 0, 0], (-1, 1))) ones = K.concatenate([ones] * self.output_dim, 1) B_U = [K.in_train_phase(K.dropout(ones, self.dropout_U), ones) for _ in range(3)] constants.append(B_U) else: constants.append([K.cast_to_floatx(1.) for _ in range(3)]) if 0 < self.dropout_W < 1: input_shape = self.input_spec[0].shape input_dim = input_shape[-1] ones = K.ones_like(K.reshape(x[:, 0, 0], (-1, 1))) ones = K.concatenate([ones] * input_dim, 1) B_W = [K.in_train_phase(K.dropout(ones, self.dropout_W), ones) for _ in range(3)] constants.append(B_W) else: constants.append([K.cast_to_floatx(1.) for _ in range(3)]) return constants
def call(self, inputs, **kwargs): main_input, embedding_matrix = inputs input_shape_tensor = K.shape(main_input) last_input_dim = K.int_shape(main_input)[-1] emb_input_dim, emb_output_dim = K.int_shape(embedding_matrix) projected = K.dot(K.reshape(main_input, (-1, last_input_dim)), self.projection) if self.add_biases: projected = K.bias_add(projected, self.biases, data_format='channels_last') if 0 < self.projection_dropout < 1: projected = K.in_train_phase( lambda: K.dropout(projected, self.projection_dropout), projected, training=kwargs.get('training')) attention = K.dot(projected, K.transpose(embedding_matrix)) if self.scaled_attention: sqrt_d = K.constant(math.sqrt(emb_output_dim), dtype=K.floatx()) attention = attention / sqrt_d result = K.reshape( self.activation(attention), (input_shape_tensor[0], input_shape_tensor[1], emb_input_dim)) return result
def call(self, inputs, **kwargs): feature_num = self.feature_num [v, q] = inputs v_proj = K.tanh(K.dot(v, self.v_proj)) q_proj = K.tanh(K.dot(q, self.q_proj)) q_proj = K.expand_dims(q_proj, 1) q_proj = tf.tile(q_proj, [1, feature_num, 1]) joint_repr = v_proj * q_proj joint_repr = K.dropout(joint_repr, self.drop_rate) logit = K.dot(joint_repr, self.linear) logit = K.reshape(logit, shape=[-1, feature_num]) logit = K.softmax(K.l2_normalize(logit)) #[batch,K] logit = K.expand_dims(logit, -1) self.result = K.sum(logit * v, 1) #v:[batch,K,4096] return self.result
def _time_distributed_dense(x, w, b=None, dropout=None, input_dim=None, output_dim=None, timesteps=None, training=None): """Apply `y . w + b` for every temporal slice y of x. # Arguments x: input tensor. w: weight matrix. b: optional bias vector. dropout: wether to apply dropout (same dropout mask for every temporal slice of the input). input_dim: integer; optional dimensionality of the input. output_dim: integer; optional dimensionality of the output. timesteps: integer; optional number of timesteps. training: training phase tensor or boolean. # Returns Output tensor. """ if not input_dim: input_dim = K.shape(x)[2] if not timesteps: timesteps = K.shape(x)[1] if not output_dim: output_dim = K.shape(w)[1] if dropout is not None and 0. < dropout < 1.: # apply the same dropout pattern at every timestep ones = K.ones_like(K.reshape(x[:, 0, :], (-1, input_dim))) dropout_matrix = K.dropout(ones, dropout) expanded_dropout_matrix = K.repeat(dropout_matrix, timesteps) x = K.in_train_phase(x * expanded_dropout_matrix, x, training=training) # collapse time dimension and batch dimension together x = K.reshape(x, (-1, input_dim)) x = K.dot(x, w) if b is not None: x = K.bias_add(x, b) # reshape to 3D tensor if K.backend() == 'tensorflow': x = K.reshape(x, K.stack([-1, timesteps, output_dim])) x.set_shape([None, None, output_dim]) else: x = K.reshape(x, (-1, timesteps, output_dim)) return x
def call(self, inputs, train=True, cache=None): assert isinstance(inputs, (list, tuple)) and len(inputs) == 3 x = inputs[0] y = inputs[1] bias = inputs[2] q = self.q_dense_layer(x) k = self.k_dense_layer(y) v = self.v_dense_layer(y) if cache is not None: # Combine cached keys and values with new keys and values. k = K.concatenate([cache["k"], k], axis=1) v = K.concatenate([cache["v"], v], axis=1) # Update cache cache["k"] = k cache["v"] = v # [batch_size, seq_len, hidden_size] # Split q, k, v into heads. q = self.split_heads(q) k = self.split_heads(k) v = self.split_heads(v) # Scale q to prevent the dot product between q and k from growing too large. depth = (self.hidden_size // self.num_heads) q *= depth ** -0.5 # Calculate dot product attention logits = K2.matmul(q, K.permute_dimensions(k, (0, 1, 3, 2))) logits += bias weights = K.softmax(logits, axis=-1) if train: weights = K.dropout(weights, self.attention_dropout) attention_output = K2.matmul(weights, v) # Recombine heads --> [batch_size, length, hidden_size] attention_output = self.combine_heads(attention_output) # Run the combined outputs through another linear projection layer. attention_output = self.output_dense_layer(attention_output) return attention_output
def train(num_classes=100, epochs=100, reps=1): (x_train, y_train) = load_data(num_classes,reps) model = Sequential() model.add(Lambda(lambda x: K.dropout(x, level=0.9), input_shape=input_shape))#permanent dropout model.add(Conv2D(32, kernel_size=(3, 3), kernel_initializer='uniform')) model.add(BatchNormalization()) model.add(Activation('relu')) model.add(Conv2D(64, (3, 3), kernel_initializer='uniform')) model.add(BatchNormalization()) model.add(Activation('relu')) model.add(MaxPooling2D(pool_size=(2, 2))) model.add(Flatten()) model.add(Dense(128, kernel_initializer='uniform')) model.add(BatchNormalization()) model.add(Activation('relu')) model.add(Dense(num_classes, activation='softmax')) # model.load_weights("saved_bn.hdf5") model.summary() model.compile(loss=keras.losses.categorical_crossentropy, optimizer=keras.optimizers.SGD(lr=0.05, decay=1e-6, momentum=0.9, nesterov=True), metrics=['accuracy']) #add callbacks tensorboard = TensorBoard(log_dir="logs/{}".format(time()), histogram_freq=10, write_graph=True, write_images=True) checkpoint = ModelCheckpoint("saved_bn.hdf5", monitor='val_acc', verbose=1, save_best_only=True, save_weights_only=False, mode='auto', period=1) stopper = EarlyStopping(monitor='val_acc', min_delta=0.01, patience=10, verbose=1, mode='auto') model.fit(x_train, y_train, batch_size=100, epochs=epochs, verbose=1, shuffle=True, validation_data=(x_train,y_train), callbacks=[tensorboard, checkpoint, stopper]) return model
def time_distributed_dense(x, w, b=None, dropout=None, input_dim=None, output_dim=None, timesteps=None, training=None): """Apply `y . w + b` for every temporal slice y of x. # Arguments x: input tensor. w: weight matrix. b: optional bias vector. dropout: wether to apply dropout (same dropout mask for every temporal slice of the input). input_dim: integer; optional dimensionality of the input. output_dim: integer; optional dimensionality of the output. timesteps: integer; optional number of timesteps. training: training phase tensor or boolean. # Returns Output tensor. """ if not input_dim: input_dim = K.shape(x)[2] if not timesteps: timesteps = K.shape(x)[1] if not output_dim: output_dim = K.shape(w)[1] if dropout is not None and 0. < dropout < 1.: # apply the same dropout pattern at every timestep ones = K.ones_like(K.reshape(x[:, 0, :], (-1, input_dim))) dropout_matrix = K.dropout(ones, dropout) expanded_dropout_matrix = K.repeat(dropout_matrix, timesteps) x = K.in_train_phase(x * expanded_dropout_matrix, x, training=training) # collapse time dimension and batch dimension together x = K.reshape(x, (-1, input_dim)) x = K.dot(x, w) if b is not None: x = K.bias_add(x, b) # reshape to 3D tensor if K.backend() == 'tensorflow': x = K.reshape(x, K.stack([-1, timesteps, output_dim])) x.set_shape([None, None, output_dim]) else: x = K.reshape(x, (-1, timesteps, output_dim)) return x
def step_do(self, step_in, states): # 定义每一步的迭代 in_value = step_in if 0 < self.dropout < 1.: self._dropout_mask = K.in_train_phase( K.dropout(K.ones_like(step_in), self.dropout), K.ones_like(step_in)) if 0 < self.dropout < 1.: in_value = step_in * self._dropout_mask ''' g1 = states[0][:, :-18] g2 = states[1][:, :-18] g3 = in_value[:, :-18] d1 = K.sigmoid(K.sqrt(K.sum((K.square(g3-g1)),axis=-1,keepdims=True))) d2 = K.sigmoid(K.sqrt(K.sum((K.square(g3-g2)),axis=-1,keepdims=True))) d1 = K.sigmoid(K.sum((K.abs(in_value-states[0])/self.units),axis=-1,keepdims=True)) d2 = K.sigmoid(K.sum((K.abs(in_value-states[1])/self.units),axis=-1,keepdims=True)) ''' d1 = K.sigmoid(states[0] * in_value) / 2 d2 = K.sigmoid(states[1] * in_value) / 2 #update=1#K.sigmoid(K.dot(states[0], self.state_kernel)+ K.dot(step_in, self.input_kernel)) '''''' #print('d1.shape',d1.shape) state1 = d1 * states[0] + (1 - d1) * in_value print('state1.shape', state1.shape) state2 = (1 - d2) * states[1] + d2 * in_value #outputs = (1-update)*states[0]+update*step_in ''' lt = K.expand_dims(state1,axis=-2) st = K.expand_dims(state2,axis=-2) outputs = K.concatenate([lt, st], axis=-2) out1 = K.dot(state1, self.encode_kernel) out2 = K.dot(state2, self.encode_kernel) ''' outputs = K.concatenate([state1, state2], axis=-1) #outputs = K.relu(outputs) return outputs, [state1, state2]
def _time_distributed_dense(x, w, b=None, dropout=None, input_dim=None, output_dim=None, timesteps=None, training=None): """Apply `y . w + b` for every temporal slice y of x. # Arguments x: input tensor. w: weight matrix. b: optional bias vector. dropout: wether to apply dropout (same dropout mask for every temporal slice of the input). input_dim: integer; optional dimensionality of the input. output_dim: integer; optional dimensionality of the output. timesteps: integer; optional number of timesteps. training: training phase tensor or boolean. # Returns Output tensor. """ if not input_dim: input_dim = K.shape(x)[2] if not timesteps: timesteps = K.shape(x)[1] if not output_dim: output_dim = K.shape(w)[1] if dropout is not None and 0. < dropout < 1.: # apply the same dropout pattern at every timestep ones = K.ones_like(K.reshape(x[:, 0, :], (-1, input_dim))) dropout_matrix = K.dropout(ones, dropout) expanded_dropout_matrix = K.repeat(dropout_matrix, timesteps) x = K.in_train_phase(x * expanded_dropout_matrix, x, training=training) # maybe below is more clear implementation compared to older keras # at least it works the same for tensorflow, but not tested on other backends x = K.dot(x, w) if b is not None: x = K.bias_add(x, b) return x
def _call_attention(self, Key, Value, Query): r"""self-attention就是通过相似度函数计算得的相似矩阵过softmax后与自身点乘得到 .. math:: A = Softmax(Similarity(Source,Query)) .. math:: C = A \cdot Source """ if isinstance(self.similarity, Callable): sim = self.similarity(Key, Query) else: sim = getattr(self, self.similarity)(Key, Query) sm = activations.softmax(sim) if self.dropout_rate: sm = K.dropout(sm, self.dropout_rate) if isinstance(self.mergfunc, Callable): result = self.mergfunc(sm, Value) elif isinstance(self.mergfunc, str): result = getattr(self, self.mergfunc, 'batch_dot_merg')(sm, Value) else: result = getattr(self, 'batch_dot_merg')(sm, Value) return result
def deconv2d(layer_input, filters, f_size=8, dropout_rate=0, permanent=False): """Layers used during upsampling""" u = UpSampling2D(size=2)(layer_input) u = Conv2D(filters, kernel_size=f_size, strides=1, padding='same', activation='relu')(u) if dropout_rate and not permanent: u = Dropout(dropout_rate)(u) elif dropout_rate and permanent: # permanent droput from my main man fchollet <3 u = Lambda(lambda x: K.dropout(x, level=dropout_rate))(u) u = BatchNormalization(momentum=0.8)(u) return u
def call(self, x, mask=None): if mask is None: mask = T.mean(T.ones_like(x), axis=-1) mask = T.cast(mask, T.floatx()) dr_perc = 0.5 mask1 = T.dropout(mask, level=dr_perc) mask1 = T.clip(mask1, 0, 1) mod_smax = T.max(x[:, :, 0] * mask1, axis=1).dimshuffle(0, 'x') smax = T.max(x[:, :, 0] * mask, axis=1).dimshuffle(0, 'x') #(nb_samples, np_features) smin = T.min(x[:, :, 0] * mask, axis=1).dimshuffle(0, 'x') #(nb_samples, np_features) # mod_smax=T.expand_dims(T.max(x[:,:,0]*mask1,axis=1), 1) # smax = T.expand_dims(T.max(x[:,:,0]*mask,axis=1), 1) #(nb_samples, np_features) # smin = T.expand_dims(T.min(x[:,:,0]*mask,axis=1), 1) #(nb_samples, np_features) x_rounded = x[:, :, 0] * mask sum_unmasked = T.batch_dot(x_rounded, mask, axes=1) # (nb_samples,np_features) ssum = T.sum(x, axis=-2) #(nb_samples, np_features) rcnt = T.sum( mask, axis=-1, keepdims=True ) #(nb_samples) # number of unmasked samples in each record bag_label = sum_unmasked / rcnt smean = ssum / rcnt # # sigmoid weighted mean: # weight_fn=T.reshape(T.transpose(T.tile(T.reshape(T.variable(get_weight_fn(100)),(100,1)),T.shape(x)[0])),(T.shape(x)[0],T.shape(x)[1],1)) # weighted_x=weight_fn*x # wsum=T.sum(weighted_x,axis=-2) #(nb_samples, np_features) ## weight_sum=T.reshape(T.batch_dot(T.ones_like(x),weight_fn,axes=1),T.shape(rcnt)) # used T.ones_like(x) instead of x to check if I am seeing the outputs..which helped me debug # wmean=wsum # because the weights are normalized # sofmax=(1/largeNum)*T.log(T.sum(T.exp())) # return bag_label return smax # max voting
def call(self, inputs, mask=None): assert len(inputs) == 2 query = K.bias_add(K.dot(inputs[0], self.W_query), self.bias_query) if self.query_act is not None: query = self.query_act(query) key = K.bias_add(K.dot(inputs[1], self.W_key), self.bias_key) if self.key_act is not None: key = self.key_act(key) value = K.bias_add(K.dot(inputs[1], self.W_value), self.bias_value) if self.value_act is not None: value = self.value_act(value) query = K.reshape(query, shape=(-1, K.int_shape(inputs[0])[1], self.num_attention_heads, self.size_per_head)) query = K.permute_dimensions(query, pattern=(0,2,1,3)) key = K.reshape(key, shape=(-1, K.int_shape(inputs[1])[1], self.num_attention_heads, self.size_per_head)) key = K.permute_dimensions(key, pattern=(0,2,1,3)) value = K.reshape(value, shape=(-1, K.int_shape(inputs[1])[1], self.num_attention_heads, self.size_per_head)) value = K.permute_dimensions(value, pattern=(0,2,1,3)) attention_scores = K.batch_dot(query, key, axes=(3,3)) attention_scores /= np.sqrt(self.size_per_head) if mask is not None and mask != [None, None]: mask_q, mask_k = mask mask_q = K.cast(mask_q, K.floatx()) mask_k = K.cast(mask_k, K.floatx()) mask_q = K.expand_dims(mask_q) mask_k = K.expand_dims(mask_k) attention_mask = K.batch_dot(mask_q, mask_k, axes=(-1,-1)) attention_mask = K.expand_dims(attention_mask, axis=1) adder = (1 - attention_mask) * (-10000.0) attention_scores += adder attention_probs = K.softmax(attention_scores, axis=-1) attention_probs = K.dropout(attention_probs, self.attention_probs_dropout_prob) context = K.batch_dot(attention_probs, value, axes=(3,2)) context = K.permute_dimensions(context, pattern=(0,2,1,3)) context = K.reshape(context, shape=(-1, K.int_shape(inputs[0])[1], self.num_attention_heads*self.size_per_head)) return context
def time_distributed_dense(input_tensor, weight, bias=None, timesteps=None, input_dim=None, output_dim=None, dropout=None, training=None): """Apply t.weight + bias for every t of timesteps of input input_tensor: input tensor shape = (batch num, timestep, input_dim) weight: weight tensor = (input_dim, output_dim) bias: optional bias dropout: dropout value training: training phase boolean """ if timesteps is None: timesteps = K.shape(input_tensor)[1] if input_dim is None: input_dim = K.shape(input_tensor)[2] if output_dim is None: output_dim = K.shape(weight)[1] if dropout is not None and 0. < dropout < 1.: #apply dropout at every timestep ones = K.ones_like(K.reshape(input_tensor[:, 0, :], (-1, input_dim))) dropout_tensor = K.dropout(ones, dropout) dropout_tensor_with_timestep = K.repeat(dropout_tensor, timesteps) input_tensor = K.in_train_phase(input_tensor * dropout_tensor_with_timestep, input_tensor, training=training) #collapse timestep and batch num together input_tensor = K.reshape(input_tensor, (-1, input_dim)) input_tensor = K.dot(input_tensor, weight) if bias is not None: input_tensor = K.bias_add(input_tensor, bias) output_tensor = K.reshape(input_tensor, (-1, timesteps, output_dim)) return output_tensor
def _time_distributed_dense(x, w, b=None, dropout=None, input_dim=None, output_dim=None, timesteps=None, training=None): """Apply `y . w + b` for every temporal slice y of x. # Returns Output tensor. """ if not input_dim: input_dim = K.shape(x)[2] if not timesteps: timesteps = K.shape(x)[1] if not output_dim: output_dim = K.shape(w)[1] if dropout is not None and 0. < dropout < 1.: # apply the same dropout pattern at every timestep ones = K.ones_like(K.reshape(x[:, 0, :], (-1, input_dim))) dropout_matrix = K.dropout(ones, dropout) expanded_dropout_matrix = K.repeat(dropout_matrix, timesteps) x = K.in_train_phase(x * expanded_dropout_matrix, x, training=training) # collapse time dimension and batch dimension together x = K.reshape(x, (-1, input_dim)) x = K.dot(x, w) if b is not None: x = K.bias_add(x, b) # reshape to 3D tensor if K.backend() == 'tensorflow': x = K.reshape(x, K.stack([-1, timesteps, output_dim])) x.set_shape([None, None, output_dim]) else: x = K.reshape(x, (-1, timesteps, output_dim)) return x
def build_model(loss="mse", num_outputs=1): model = Sequential() model.add( ConvLSTM2D(filters=100, kernel_size=(3, 3), input_shape=(None, nx, ny, 1), padding='same', return_sequences=False)) model.add(BatchNormalization()) model.add(Lambda(lambda x: K.dropout(x, level=0.75))) model.add( Dense( units=num_outputs, kernel_regularizer=regularizers.l2(0.0001), )) model.add(Activation("linear")) model.compile(loss=loss, optimizer='nadam') return model
def sample_h_given_x(self, x): """ Draw sample from p(h|x). For Bernoulli RBM the conditional probability distribution can be derived to be p(h_j=1|x) = sigmoid(x^T W[:,j] + bh_j). """ h_pre = K.dot(x, self.W) + self.bh # pre-sigmoid (used in cross-entropy error calculation for better numerical stability) #h_sigm = K.sigmoid(h_pre) # mean of Bernoulli distribution ('p', prob. of variable taking value 1), sometimes called mean-field value h_sigm = self.activation(self.scaling_h_given_x * h_pre) # drop out noise if(0.0 < self.p < 1.0): noise_shape = self._get_noise_shape(h_sigm) h_sigm = K.in_train_phase(K.dropout(h_sigm, self.p, noise_shape), h_sigm) h_samp = random_binomial(shape=h_sigm.shape, n=1, p=h_sigm) # random sample # \hat{h} = 1, if p(h=1|x) > uniform(0, 1) # 0, otherwise return h_samp, h_pre, h_sigm
def time_distributed_dense(self, x, w, b=None, dropout=None, input_dim=None, output_dim=None, timesteps=None): '''Apply y.w + b for every temporal slice y of x. ''' self.x = x self.w = w self.b = b self.droput = dropout self.input_dim = input_dim self.output_dim =output_dim self.timesteps = timesteps if not input_dim: # won't work with TensorFlow input_dim = K.shape(x)[2] if not timesteps: # won't work with TensorFlow timesteps = K.shape(x)[1] if not output_dim: # won't work with TensorFlow output_dim = K.shape(w)[1] if dropout: # apply the same dropout pattern at every timestep ones = K.ones_like(K.reshape(x[:, 0, :], (-1, input_dim))) dropout_matrix = K.dropout(ones, dropout) expanded_dropout_matrix = K.repeat(dropout_matrix, timesteps) x *= expanded_dropout_matrix # collapse time dimension and batch dimension together x = K.reshape(x, (-1, input_dim)) x = K.dot(x, w) if b: x = x + b # reshape to 3D tensor x = K.reshape(x, (-1, timesteps, output_dim)) return x
def call(self, x, mask=None): q = K.dot(x, self.W_q) k = K.dot(x, self.W_k) v = K.dot(x, self.W_v) k_t = K.permute_dimensions(k, (0, 2, 1)) q *= self.depth**(-0.5) # scaled dot-product logit = K.batch_dot(q, k_t) # [batch_size, q_length, k_length] # softmax を取ることで正規化します attention_weight = K.softmax(logit) # dropout attention_weight = K.dropout(attention_weight, level=self.dropout_rate) # 重みに従って value から情報を引いてきます # [batch_size, q_length, depth] attention_output = K.batch_dot(attention_weight, v) output = K.dot(attention_output, self.W_o) return output
def get_constants(self, x): # needs further edition constants = [] # if 0 < self.dropout_U < 1: # ones = K.ones_like(K.reshape(x[:, 0, 0], (-1, 1))) # ones = K.concatenate([ones] * self.output_dim, 1) # B_U = [K.in_train_phase(K.dropout(ones, self.dropout_U), ones) for _ in range(3)] # constants.append(B_U) # else: # constants.append([K.cast_to_floatx(1.) for _ in range(3)]) if 0 < self.dropout_W < 1: input_shape = self.input_spec[0].shape input_dim = input_shape[-1] ones = K.ones_like(K.reshape(x[:, 0, 0], (-1, 1))) ones = K.concatenate([ones] * input_dim, 1) B_W = [ K.in_train_phase(K.dropout(ones, self.dropout_W), ones) for _ in range(3) ] constants.append(B_W) else: constants.append([K.cast_to_floatx(1.) for _ in range(3)]) return constants
def step_do(self, step_in, states): # 定义每一步的迭代 in_value = step_in if 0 < self.dropout < 1.: self._dropout_mask = K.in_train_phase( K.dropout(K.ones_like(step_in), self.dropout), K.ones_like(step_in)) if 0 < self.dropout < 1.: in_value = step_in * self._dropout_mask d1 = K.sigmoid( K.sqrt( K.sum((K.square(in_value - states[0]) / self.units), axis=-1, keepdims=True) / 2)) d2 = K.sigmoid( K.sqrt( K.sum((K.square(in_value - states[1]) / self.units), axis=-1, keepdims=True) / 2)) ''' d1 = K.sigmoid(K.sum((K.abs(in_value-states[0])/self.units),axis=-1,keepdims=True)) d2 = K.sigmoid(K.sum((K.abs(in_value-states[1])/self.units),axis=-1,keepdims=True)) ''' print('d1.shape', d1.shape) state1 = d1 * states[0] + (1 - d1) * in_value print('state1.shape', state1.shape) state2 = (1 - d2) * states[0] + d2 * in_value ''' lt = K.expand_dims(state1,axis=-2) st = K.expand_dims(state2,axis=-2) outputs = K.concatenate([lt, st], axis=-2) ''' outputs = K.concatenate([state1, state2], axis=-1) return outputs, [state1, state2]
def call(self, inputs, **kwargs): main_input, embedding_matrix = inputs input_shape_tensor = K.shape(main_input) last_input_dim = input_shape_tensor[-1] print('input_shape_tensor: ', input_shape_tensor) embedding_matrix_shape = K.shape(embedding_matrix) # vocab_size, hidden_size emb_input_dim, emb_output_dim = embedding_matrix_shape[ -2], embedding_matrix_shape[-1] # shape: (main_input_shape[0], hidden_size) projected = K.dot(K.reshape(main_input, (-1, last_input_dim)), self.projection) if self.add_biases: projected = K.bias_add(projected, self.biases, data_format='channels_last') if 0 < self.projection_dropout < 1: projected = K.in_train_phase( lambda: K.dropout(projected, self.projection_dropout), projected, training=kwargs.get('training')) # shape: (main_input_shape[0], vocab_size). Calculate with all words in the vocabulary attention = K.dot(projected, K.transpose(embedding_matrix)) if self.scaled_attention: # scaled dot-product attention, described in # "Attention is all you need" (https://arxiv.org/abs/1706.03762) #sqrt_d = math.sqrt(emb_output_dim) sqrt_d = K.sqrt(K.cast(emb_output_dim, dtype=K.floatx())) attention = attention / sqrt_d result = K.reshape( self.activation(attention), (input_shape_tensor[0], input_shape_tensor[1], emb_input_dim)) return result
def dropped_inputs(): return K.dropout(ones, self.recurrent_dropout)
def dropped_inputs(): return K.dropout(ones, self.dropout)
def dropped_inputs(): return K.dropout(inputs, self.rate, noise_shape, seed=self.seed)
def _dropout(x, level, noise_shape=None, seed=None): x = K.dropout(x, level, noise_shape, seed) x *= (1. - level) # compensate for the scaling by the dropout return x
def get_output(self, train=False): X = self.get_input(train) if self.p > 0.: X = K.dropout(X, level=self.p) return X
def call(self, x, mask=None): if 0. < self.rate < 1.: noise_shape = self._get_noise_shape(x) x = K.dropout(x, self.rate, noise_shape) return x
# Train Set float_data = shuffel(float_data) #Nrmalisation train_data float_data[:, :-1] = dp.zero_mean_normalization(float_data[:, :-1]) data = float_data[:, :-1] train_data = data[150:] label_data = float_data[150:, -1] #data[:150]=K.get_value(K.dropout(data[:150],0.2,None,None))*0.8 #data[:150,],float_data[:150,-1]=verarbeit_meanimp(data[:150,],float_data[:150,-1]) #validation data # MCAR train_data_voll = K.dropout(train_data, 0.0001, None, None) train_data_voll = K.get_value(train_data_voll) train_data_20_MCAR = K.dropout(train_data, 0.2, None, None) train_data_20_MCAR = K.get_value(train_data_20_MCAR) train_data_20_MCAR *= 0.8 train_data_40_MCAR = K.dropout(train_data, 0.4, None, None) train_data_40_MCAR = K.get_value(train_data_40_MCAR) train_data_40_MCAR *= 0.6 #MAR sort_float_data = sorted(float_data[150:], key=lambda x: x[1]) sort_float_data = np.array(sort_float_data) sort_train_data = sort_float_data[:, :-1] label_data_MAR = sort_float_data[:, -1]
def VGG19(include_top=True, weights='imagenet', input_tensor=None, input_shape=None, pooling=None, classes=1000,isDrop=False, drop_rate=0.3, **kwargs): """Instantiates the VGG19 architecture. Optionally loads weights pre-trained on ImageNet. Note that the data format convention used by the model is the one specified in your Keras config at `~/.keras/keras.json`. # Arguments include_top: whether to include the 3 fully-connected layers at the top of the network. weights: one of `None` (random initialization), 'imagenet' (pre-training on ImageNet), or the path to the weights file to be loaded. input_tensor: optional Keras tensor (i.e. output of `layers.Input()`) to use as image input for the model. input_shape: optional shape tuple, only to be specified if `include_top` is False (otherwise the input shape has to be `(224, 224, 3)` (with `channels_last` data format) or `(3, 224, 224)` (with `channels_first` data format). It should have exactly 3 inputs channels, and width and height should be no smaller than 32. E.g. `(200, 200, 3)` would be one valid value. pooling: Optional pooling mode for feature extraction when `include_top` is `False`. - `None` means that the output of the model will be the 4D tensor output of the last convolutional block. - `avg` means that global average pooling will be applied to the output of the last convolutional block, and thus the output of the model will be a 2D tensor. - `max` means that global max pooling will be applied. classes: optional number of classes to classify images into, only to be specified if `include_top` is True, and if no `weights` argument is specified. # Returns A Keras model instance. # Raises ValueError: in case of invalid argument for `weights`, or invalid input shape. """ backend, layers, models, keras_utils = get_submodules_from_kwargs(kwargs) if not (weights in {'imagenet', None} or os.path.exists(weights)): raise ValueError('The `weights` argument should be either ' '`None` (random initialization), `imagenet` ' '(pre-training on ImageNet), ' 'or the path to the weights file to be loaded.') if weights == 'imagenet' and include_top and classes != 1000: raise ValueError('If using `weights` as `"imagenet"` with `include_top`' ' as true, `classes` should be 1000') # Determine proper input shape input_shape = _obtain_input_shape(input_shape, default_size=224, min_size=32, data_format=backend.image_data_format(), require_flatten=include_top, weights=weights) if input_tensor is None: img_input = layers.Input(shape=input_shape) else: if not backend.is_keras_tensor(input_tensor): img_input = layers.Input(tensor=input_tensor, shape=input_shape) else: img_input = input_tensor # Block 1 x = layers.Conv2D(64, (3, 3), activation='relu', padding='same', name='block1_conv1')(img_input) x = layers.Conv2D(64, (3, 3), activation='relu', padding='same', name='block1_conv2')(x) x = layers.MaxPooling2D((2, 2), strides=(2, 2), name='block1_pool')(x) if isDrop: x = Lambda(lambda x: K.dropout(x, level=drop_rate))(x) # Block 2 x = layers.Conv2D(128, (3, 3), activation='relu', padding='same', name='block2_conv1')(x) x = layers.Conv2D(128, (3, 3), activation='relu', padding='same', name='block2_conv2')(x) x = layers.MaxPooling2D((2, 2), strides=(2, 2), name='block2_pool')(x) if isDrop: x = Lambda(lambda x: K.dropout(x, level=drop_rate))(x) # Block 3 x = layers.Conv2D(256, (3, 3), activation='relu', padding='same', name='block3_conv1')(x) x = layers.Conv2D(256, (3, 3), activation='relu', padding='same', name='block3_conv2')(x) if isDrop: x = Lambda(lambda x: K.dropout(x, level=drop_rate))(x) x = layers.Conv2D(256, (3, 3), activation='relu', padding='same', name='block3_conv3')(x) x = layers.Conv2D(256, (3, 3), activation='relu', padding='same', name='block3_conv4')(x) x = layers.MaxPooling2D((2, 2), strides=(2, 2), name='block3_pool')(x) # Block 4 x = layers.Conv2D(512, (3, 3), activation='relu', padding='same', name='block4_conv1')(x) x = layers.Conv2D(512, (3, 3), activation='relu', padding='same', name='block4_conv2')(x) if isDrop: x = Lambda(lambda x: K.dropout(x, level=drop_rate))(x) x = layers.Conv2D(512, (3, 3), activation='relu', padding='same', name='block4_conv3')(x) x = layers.Conv2D(512, (3, 3), activation='relu', padding='same', name='block4_conv4')(x) x = layers.MaxPooling2D((2, 2), strides=(2, 2), name='block4_pool')(x) # Block 5 x = layers.Conv2D(512, (3, 3), activation='relu', padding='same', name='block5_conv1')(x) x = layers.Conv2D(512, (3, 3), activation='relu', padding='same', name='block5_conv2')(x) if isDrop: x = Lambda(lambda x: K.dropout(x, level=drop_rate))(x) x = layers.Conv2D(512, (3, 3), activation='relu', padding='same', name='block5_conv3')(x) x = layers.Conv2D(512, (3, 3), activation='relu', padding='same', name='block5_conv4')(x) x = layers.MaxPooling2D((2, 2), strides=(2, 2), name='block5_pool')(x) if include_top: # Classification block x = layers.Flatten(name='flatten')(x) x = layers.Dense(4096, activation='relu', name='fc1')(x) if isDrop: x = Lambda(lambda x: K.dropout(x, level=drop_rate))(x) x = layers.Dense(4096, activation='relu', name='fc2')(x) x = layers.Dense(classes, activation='softmax', name='predictions')(x) else: if pooling == 'avg': x = layers.GlobalAveragePooling2D()(x) elif pooling == 'max': x = layers.GlobalMaxPooling2D()(x) # Ensure that the model takes into account # any potential predecessors of `input_tensor`. if input_tensor is not None: inputs = keras_utils.get_source_inputs(input_tensor) else: inputs = img_input # Create model. model = models.Model(inputs, x, name='vgg19') # Load weights. if weights == 'imagenet': if include_top: weights_path = keras_utils.get_file( 'vgg19_weights_tf_dim_ordering_tf_kernels.h5', WEIGHTS_PATH, cache_subdir='models', file_hash='cbe5617147190e668d6c5d5026f83318') else: weights_path = keras_utils.get_file( 'vgg19_weights_tf_dim_ordering_tf_kernels_notop.h5', WEIGHTS_PATH_NO_TOP, cache_subdir='models', file_hash='253f8cb515780f3b799900260a226db6') model.load_weights(weights_path) if backend.backend() == 'theano': keras_utils.convert_all_kernels_in_model(model) elif weights is not None: model.load_weights(weights) return model
def output(self, train=False): X = self._default_input(train) if self.p > 0.: if train: X = K.dropout(X, level=self.p) return X
def PermaDropout(rate): return Lambda(lambda x: K.dropout(x, level=rate))
X_test /= 255 print(X_train.shape[0], 'train samples') print(X_test.shape[0], 'test samples') # convert class vectors to binary class matrices Y_train = np_utils.to_categorical(y_train, nb_classes) Y_test = np_utils.to_categorical(y_test, nb_classes) return X_train, Y_train, X_test, Y_test X_train, Y_train, X_test, Y_test = load_mnist() model = Sequential() model.add(Dense(512, input_shape=(784,))) model.add(Activation('relu')) #model.add(Dropout(0.2)) model.add(Lambda(lambda x: K.dropout(x, level=0.2))) model.add(Dense(512)) model.add(Activation('relu')) #model.add(Dropout(0.2)) model.add(Lambda(lambda x: K.dropout(x, level=0.2))) model.add(Dense(10)) model.add(Activation('softmax')) rms = RMSprop() model.compile(loss='categorical_crossentropy', optimizer=rms) model.fit(X_train, Y_train, batch_size=batch_size, nb_epoch=nb_epoch, show_accuracy=True, verbose=2, validation_data=(X_test, Y_test))
def call(self, x, mask=None): if 0. < self.p < 1.: x = K.dropout(x, level=self.p) return x
def func(args): old, new = args pred = K.random_uniform([]) < self.dropout ret = K.switch(pred, old, old + K.dropout(new, self.dropout)) return K.in_train_phase(ret, old + new)
def call(self, x, mask=None): if 0. < self.p < 1.: x = K.in_train_phase(K.dropout(x, level=self.p), x) return x