def residual_block(x): original_x = x # TODO: initalization, regularization? # Note: The AtrousConvolution1D with the 'causal' flag is implemented in github.com/basveeling/keras#@wavenet. tanh_out = CausalAtrousConvolution1D(nb_filters, 2, atrous_rate=2**i, border_mode='valid', causal=True, bias=use_bias, name='dilated_conv_%d_tanh_s%d' % (2**i, s), activation='tanh')(x) x = layers.Dropout(0.2)(x) sigm_out = CausalAtrousConvolution1D(nb_filters, 2, atrous_rate=2**i, border_mode='valid', causal=True, bias=use_bias, name='dilated_conv_%d_sigm_s%d' % (2**i, s), activation='sigmoid')(x) x = layers.Merge(mode='mul', name='gated_activation_%d_s%d' % (i, s))([tanh_out, sigm_out]) res_x = layers.Convolution1D(nb_filters, 1, border_mode='same', bias=use_bias)(x) res_x = layers.Merge(mode='sum')([original_x, res_x]) return res_x
def residual_block(x): original_x = x # TODO: initalization, regularization? # Note: The AtrousConvolution1D with the 'causal' flag is implemented in github.com/basveeling/keras#@wavenet. tanh_out = Conv1D( nb_filters, 2, dilation_rate=2**i, padding='causal', bias=use_bias, name='dilated_conv_%d_tanh_s%d' % (2**i, s), activation='tanh', W_regularizer=l2(res_l2) )(x) sigm_out = Conv1D( nb_filters, 2, dilation_rate=2**i, padding='causal', bias=use_bias, name='dilated_conv_%d_sigm_s%d' % (2**i, s), activation='sigmoid', W_regularizer=l2(res_l2) )(x) x = layers.Merge(mode='mul', name='gated_activation_%d_s%d' % (i, s))([tanh_out, sigm_out]) res_x = layers.Conv1D(nb_filters, 1, padding='same', bias=use_bias, W_regularizer=l2(res_l2))(x) skip_x = layers.Conv1D(nb_filters, 1, padding='same', bias=use_bias, W_regularizer=l2(res_l2))(x) res_x = layers.Merge(mode='sum')([original_x, res_x]) return res_x, skip_x
def call(self, inputs, **kwargs): assert isinstance(inputs, list) and len(inputs) == 2 symbols, encodings = inputs[0], inputs[1] # dropout masks self._generate_dropout_mask(inputs[1]) # contexts.shape = (M, T, left, input_dim) contexts = make_history(symbols, self.left, symbols[:, :1]) # M.shape = C.shape = (M, T, left, output_dim) M = kb.dot(contexts, self.M) # input embeddings C = kb.dot(contexts, self.C) # output embeddings if self.use_bias: M += self.T if self.embeddings_dropout > 0.0: M = M * self._dropout_mask[0] C = C * self._dropout_mask[1] p = distributed_dot_softmax(M, encodings) compressed_context = distributed_transposed_dot(C, p) if self.merge in ["concatenate", "sum"]: output_func = (kl.Concatenate() if self.merge == "concatenate" else kl.Merge(mode='sum')) output = output_func([compressed_context, encodings]) elif self.merge == "attention": output = compressed_context elif self.merge == "sigmoid": output = distributed_cell([compressed_context, encodings]) return [output, p]
def buildModel(self): #word net input1=layers.Input(shape=(self.maxLen,),name="seq1") input2=layers.Input(shape=(self.maxLen,),name="seq2") comEmbedding=layers.Embedding(input_dim=self.Size_Vocab,output_dim=self.embeddingSize,input_length=self.maxLen) emb1=comEmbedding(input1) emb2=comEmbedding(input2) reshapeLayer=layers.Reshape(target_shape=(self.maxLen,self.embeddingSize,1)) x1=reshapeLayer(emb1) x2=reshapeLayer(emb2) x=layers.Merge(mode="concat",concat_axis=2)([x1,x2]) x=ResNetX(x) dropLayer=layers.Dropout(0.36)(x) predictionLayer=layers.Dense(units=2,name="label",activation="softmax")(dropLayer) self.model=models.Model(inputs=[input1,input2], outputs=[ predictionLayer, ] ) self.model.compile(optimizer=optimizers.Adam(), loss={ "label":losses.binary_crossentropy } ) return self.model
def build_model(): # As described in https://arxiv.org/abs/1511.02283 # Input: The 4101-dim feature from extract_features, and the previous output word visual_input = models.Sequential() visual_input_shape = (None, IMAGE_FEATURE_SIZE) visual_input.add(layers.TimeDistributed(layers.Dense( WORDVEC_DIM, activation='relu', name='visual_embed'), input_shape=visual_input_shape)) word_input = models.Sequential() word_input.add(layers.Embedding(VOCABULARY_SIZE, WORDVEC_DIM, dropout=.5)) model = models.Sequential() model.add(layers.Merge([visual_input, word_input], mode='concat', concat_axis=2)) model.add(layers.LSTM(1024, name='lstm_1', return_sequences=False)) model.add(layers.Dropout(.5)) model.add(layers.Dense( VOCABULARY_SIZE, activation='softmax', name='embed_out')) return model
def build_model(fragment_length, nb_filters, nb_output_bins, dilation_depth, nb_stacks, use_skip_connections, learn_all_outputs, _log, desired_sample_rate, use_bias, res_l2, final_l2): def residual_block(x): original_x = x # TODO: initalization, regularization? # Note: The AtrousConvolution1D with the 'causal' flag is implemented in github.com/basveeling/keras#@wavenet. tanh_out = Conv1D( nb_filters, 2, dilation_rate=2**i, padding='causal', bias=use_bias, name='dilated_conv_%d_tanh_s%d' % (2**i, s), activation='tanh', W_regularizer=l2(res_l2) )(x) sigm_out = Conv1D( nb_filters, 2, dilation_rate=2**i, padding='causal', bias=use_bias, name='dilated_conv_%d_sigm_s%d' % (2**i, s), activation='sigmoid', W_regularizer=l2(res_l2) )(x) x = layers.Merge(mode='mul', name='gated_activation_%d_s%d' % (i, s))([tanh_out, sigm_out]) res_x = layers.Conv1D(nb_filters, 1, padding='same', bias=use_bias, W_regularizer=l2(res_l2))(x) skip_x = layers.Conv1D(nb_filters, 1, padding='same', bias=use_bias, W_regularizer=l2(res_l2))(x) res_x = layers.Merge(mode='sum')([original_x, res_x]) return res_x, skip_x input = Input(shape=(fragment_length, nb_output_bins), name='input_part') out = input skip_connections = [] out = Conv1D(nb_filters, 2, dilation_rate=1, padding='causal', name='initial_causal_conv')(out) for s in range(nb_stacks): for i in range(0, dilation_depth + 1): out, skip_out = residual_block(out) skip_connections.append(skip_out) if use_skip_connections: out = layers.Merge(mode='sum')(skip_connections) out = layers.Activation('relu')(out) out = layers.Conv1D(nb_output_bins, 1, padding='same', W_regularizer=l2(final_l2))(out) out = layers.Activation('relu')(out) out = layers.Conv1D(nb_output_bins, 1, padding='same')(out) if not learn_all_outputs: raise DeprecationWarning('Learning on just all outputs is wasteful, now learning only inside receptive field.') out = layers.Lambda(lambda x: x[:, -1, :], output_shape=(out._keras_shape[-1],))(out) # Based on gif in deepmind blog: take last output? out = layers.Activation('softmax', name="output_softmax")(out) model = Model(input, out) receptive_field, receptive_field_ms = compute_receptive_field() _log.info('Receptive Field: %d (%dms)' % (receptive_field, int(receptive_field_ms))) return model
def test_merge_mul(self): z1 = ZLayer.InputLayer(input_shape=(3, 5)) z2 = ZLayer.InputLayer(input_shape=(3, 5)) zlayer = ZLayer.Merge(layers=[z1, z2], mode="mul") k1 = KLayer.InputLayer(input_shape=(3, 5)) k2 = KLayer.InputLayer(input_shape=(3, 5)) klayer = KLayer.Merge(layers=[k1, k2], mode="mul") input_data = [np.random.random([2, 3, 5]), np.random.random([2, 3, 5])] self.compare_layer(klayer, zlayer, input_data)
def test_merge_concat(self): z1 = ZLayer.InputLayer(input_shape=(2, 5, 11)) z2 = ZLayer.InputLayer(input_shape=(2, 5, 8)) zlayer = ZLayer.Merge(layers=[z1, z2], mode="concat") k1 = KLayer.InputLayer(input_shape=(2, 5, 11)) k2 = KLayer.InputLayer(input_shape=(2, 5, 8)) klayer = KLayer.Merge(layers=[k1, k2], mode="concat") input_data = [np.random.random([3, 2, 5, 11]), np.random.random([3, 2, 5, 8])] self.compare_layer(klayer, zlayer, input_data)
def test_merge_mul(self): b1 = BLayer.InputLayer(input_shape=(3, 5)) b2 = BLayer.InputLayer(input_shape=(3, 5)) blayer = BLayer.Merge(layers=[b1, b2], mode="mul") k1 = KLayer.InputLayer(input_shape=(3, 5)) k2 = KLayer.InputLayer(input_shape=(3, 5)) klayer = KLayer.Merge(layers=[k1, k2], mode="mul") input_data = [np.random.random([2, 3, 5]), np.random.random([2, 3, 5])] self.compare_newapi(klayer, blayer, input_data)
def model_ContextSum(p, embeddings, max_sent_len, n_out): print("Parameters:", p) # Take sentence encoded as indices and convert it to embeddings sentence_input = layers.Input(shape=(max_sent_len,), dtype='int32', name='sentence_input') # Repeat the input 3 times as will need it once for the target entity pair and twice for the ghost pairs x = layers.RepeatVector(MAX_EDGES_PER_GRAPH)(sentence_input) word_embeddings = layers.wrappers.TimeDistributed(layers.Embedding(output_dim=embeddings.shape[1], input_dim=embeddings.shape[0], input_length=max_sent_len, weights=[embeddings], mask_zero=True, trainable=False))(x) word_embeddings = layers.Dropout(p['dropout1'])(word_embeddings) # Take token markers that identify entity positions, convert to position embeddings entity_markers = layers.Input(shape=(MAX_EDGES_PER_GRAPH, max_sent_len,), dtype='int8', name='entity_markers') pos_embeddings = layers.wrappers.TimeDistributed(layers.Embedding(output_dim=p['position_emb'], input_dim=4, input_length=max_sent_len, mask_zero=True, W_regularizer = regularizers.l2(), trainable=True))(entity_markers) # Merge word and position embeddings and apply the specified amount of RNN layers x = layers.merge([word_embeddings, pos_embeddings], mode="concat") for i in range(p["rnn1_layers"]-1): x = layers.wrappers.TimeDistributed( getattr(layers, p['rnn1'])(p['units1'], return_sequences=True, consume_less='gpu' if p['gpu'] else "cpu"))(x) sentence_matrix = layers.wrappers.TimeDistributed( getattr(layers, p['rnn1'])(p['units1'], return_sequences=False, consume_less='gpu' if p['gpu'] else "cpu"))(x) # Take the vector of the sentences with the target entity pair layers_to_concat = [] for i in range(MAX_EDGES_PER_GRAPH): sentence_vector = layers.Lambda(lambda l: l[:, i], output_shape=(p['units1'],))(sentence_matrix) if i == 0: context_vectors = layers.Lambda(lambda l: l[:, i+1:], output_shape=(MAX_EDGES_PER_GRAPH-1, p['units1']))(sentence_matrix) elif i == MAX_EDGES_PER_GRAPH - 1: context_vectors = layers.Lambda(lambda l: l[:, :i], output_shape=(MAX_EDGES_PER_GRAPH-1, p['units1']))(sentence_matrix) else: context_vectors = layers.Lambda(lambda l: K.concatenate([l[:, :i], l[:, i+1:]], axis=1), output_shape=(MAX_EDGES_PER_GRAPH-1, p['units1']))(sentence_matrix) context_vector = GlobalSumPooling1D()(context_vectors) edge_vector = layers.merge([sentence_vector, context_vector], mode="concat") edge_vector = layers.Reshape((1, p['units1']*2))(edge_vector) layers_to_concat.append(edge_vector) # edge_vectors = layers.Lambda(lambda l: K.stack(l), output_shape=(MAX_EDGES_PER_GRAPH-1, p['units1']*2))(layers_to_concat) edge_vectors = layers.Merge(mode='concat', concat_axis=1)(layers_to_concat) # Apply softmax edge_vectors = layers.Dropout(p['dropout1'])(edge_vectors) main_output = layers.wrappers.TimeDistributed(layers.Dense(n_out, activation = "softmax", name='main_output'))(edge_vectors) model = models.Model(input=[sentence_input, entity_markers], output=[main_output]) model.compile(optimizer=p['optimizer'], loss='categorical_crossentropy', metrics=['accuracy']) return model
def test_merge_max(self): b1 = BLayer.InputLayer(input_shape=(2, 5, 8)) b2 = BLayer.InputLayer(input_shape=(2, 5, 8)) blayer = BLayer.Merge(layers=[b1, b2], mode="max") k1 = KLayer.InputLayer(input_shape=(2, 5, 8)) k2 = KLayer.InputLayer(input_shape=(2, 5, 8)) klayer = KLayer.Merge(layers=[k1, k2], mode="max") input_data = [ np.random.random([3, 2, 5, 8]), np.random.random([3, 2, 5, 8]) ] self.compare_newapi(klayer, blayer, input_data)
def wavenet_block_light(x, nb_filters, subsample=2, use_bias=False, res_l2=0., dropout_rate=0., batchnorm=False, bn_momentum=0.99, **kwargs): """Conv block inspired by wavenet architecture. x : history of shape (batch_size, hist_length, nb_inputs, nb_features) x should be aranged in reverse time step, i.e latest obs is x[:,0,:,:] nb_filters : nb. of output features subsample : subsampling rate along time dimension use_bias : whether to use bias in conv layers res_l2 : l2 coef dropout_rate: spatial dropout rate batchnorm: use batchnorm if True bn_momentum: momentum coef for BatchNormalization """ # TODO: Add padding in case time dimension not divisible by sub_sample dense = x if batchnorm: dense = kl.BatchNormalization(momentum=bn_momentum)(dense) dense = kl.Convolution2D(nb_filters, nb_row=2, nb_col=1, subsample=(subsample, 1), border_mode='valid', bias=use_bias, activation='relu', W_regularizer=l2(res_l2))(dense) dense = SpatialDropout(dropout_rate, collapse_dim=(1, ))(dense) res_x = kl.Convolution2D(nb_filters, nb_row=1, nb_col=1, border_mode='same', bias=use_bias, W_regularizer=l2(res_l2))(dense) subsampled_x = kl.Lambda(lambda x: x[:, 0::subsample, :, :])(x) res_out = kl.Merge(mode='sum')([subsampled_x, res_x]) skip_out = kl.Lambda(lambda x: x[:, :1, :, :])(res_x) return res_out, skip_out
def call(self, inputs, **kwargs): assert isinstance(inputs, list) and len(inputs) == 2 symbols, encodings = inputs[0], inputs[1] # contexts.shape = (M, T, left) contexts = make_history(symbols, self.left, symbols[:, :1]) # M.shape = C.shape = (M, T, left, output_dim) M = kb.gather(self.M, contexts) # input embeddings C = kb.gather(self.C, contexts) # output embeddings if self.use_bias: M += self.T # p.shape = (M, T, input_dim) p = distributed_dot_softmax(M, encodings) # p._keras_shape = M._keras_shape[:2] + (self.) compressed_context = distributed_transposed_dot(C, p) if self.merge in ["concatenate", "sum"]: output_func = (kl.Concatenate() if self.merge == "concatenate" else kl.Merge(mode='sum')) output = output_func([compressed_context, encodings]) elif self.merge == "attention": output = compressed_context elif self.merge == "sigmoid": output = distributed_cell([compressed_context, encodings]) return [output, p]
def arch(raw_input): # Reverse time dimension make it easier not to lose the lastest obs input_ = kl.Lambda(lambda x: x[:, ::-1, :])(raw_input) # Add a dimension for filters features -> shape (bs, time, inputs, features) input_ = kl.Lambda(lambda x: K.expand_dims(x))(input_) scale_outputs = [] for hist_length, time_unit, initial_subsample in zip( hist_lengths, time_units, initial_subsamples): scale_input = kl.Lambda(lambda x: x[:, :hist_length, :, :])(input_) nb_blocks = int(np.log2(hist_length // initial_subsample)) scale_out = wavenet_light( hist_length, nb_inputs, output_horizon, nb_filters=nb_filters, nb_blocks=nb_blocks, initial_pooling=time_unit, initial_subsample=initial_subsample, use_skip_connections=use_skip_connections, use_bias=False, res_l2=res_l2, final_l2=final_l2, batchnorm=batchnorm, bn_momentum=bn_momentum, dropout_rate=dropout_rate, input_noise=input_noise, has_top=False)(scale_input) scale_outputs.append(scale_out) if len(scale_outputs) > 1: out = kl.Merge(mode=merge_scales)(scale_outputs) else: out = scale_outputs[0] if intermediate_conv: if batchnorm: out = kl.BatchNormalization(momentum=bn_momentum)(out) out = kl.Convolution2D( nb_filters, nb_row=1, nb_col=1, border_mode='same', bias=False, )(out) out = kl.Dropout(dropout_rate)(out) if n_experts > 1: out = mixture_experts(out, output_horizon, final_l2=final_l2, n_experts=n_experts) else: out = kl.Convolution2D(output_horizon, nb_row=1, nb_col=1, border_mode='same', W_regularizer=l2(final_l2))(out) # Remove time dimension out = kl.Lambda(lambda x: K.squeeze(x, 1))(out) # Switch horizons into time dimension out = kl.Permute(dims=(2, 1))(out) return out
def MultiLevelDCNet(input_shape, n_class, routings): """ A Multi-level DCNet on CIFAR-10. :param input_shape: data shape, 3d, [width, height, channels] :param n_class: number of classes :param routings: number of routing iterations :return: Two Keras Models, the first one used for training, and the second one for evaluation. """ x = layers.Input(shape=input_shape) concat_axis = 1 if K.image_data_format() == 'channels_first' else -1 ########################### Level 1 Capsules ########################### # Incorporating DenseNets - Creating a dense block with 8 layers having 32 filters and 32 growth rate. conv, nb_filter = densenet.DenseBlock(x, growth_rate=32, nb_layers=8, nb_filter=32) # Batch Normalization DenseBlockOutput = BatchNormalization(axis=concat_axis, epsilon=1.1e-5)(conv) # Creating Primary Capsules (Level 1) # Here PrimaryCapsConv2D is the Conv2D output which is used as the primary capsules by reshaping and squashing (squash activation). # primarycaps_1 (size: [None, num_capsule, dim_capsule]) is the "reshaped and sqashed output" which will be further passed to the dynamic routing protocol. primarycaps_1, PrimaryCapsConv2D = PrimaryCap(DenseBlockOutput, dim_capsule=8, n_channels=12, kernel_size=5, strides=2, padding='valid') # Applying ReLU Activation to primary capsules conv = layers.Activation('relu')(PrimaryCapsConv2D) ########################### Level 2 Capsules ########################### # Incorporating DenseNets - Creating a dense block with 8 layers having 32 filters and 32 growth rate. conv, nb_filter = densenet.DenseBlock(conv, growth_rate=32, nb_layers=8, nb_filter=32) # Batch Normalization DenseBlockOutput = BatchNormalization(axis=concat_axis, epsilon=1.1e-5)(conv) # Creating Primary Capsules (Level 2) primarycaps_2, PrimaryCapsConv2D = PrimaryCap(DenseBlockOutput, dim_capsule=8, n_channels=12, kernel_size=5, strides=2, padding='valid') # Applying ReLU Activation to primary capsules conv = layers.Activation('relu')(PrimaryCapsConv2D) ########################### Level 3 Capsules ########################### # Incorporating DenseNets - Creating a dense block with 8 layers having 32 filters and 32 growth rate. conv, nb_filter = densenet.DenseBlock(conv, growth_rate=32, nb_layers=8, nb_filter=32) # Batch Normalization DenseBlockOutput = BatchNormalization(axis=concat_axis, epsilon=1.1e-5)(conv) # Creating Primary Capsules (Level 3) primarycaps_3, PrimaryCapsConv2D = PrimaryCap(DenseBlockOutput, dim_capsule=8, n_channels=12, kernel_size=3, strides=2, padding='valid') # Merging Primary Capsules for the Merged DigitCaps (CapsuleLayer formed by combining all levels of primary capsules) mergedLayer = layers.merge([primarycaps_1,primarycaps_2,primarycaps_3], mode='concat', concat_axis=1) ########################### Separate DigitCaps Outputs (used for training) ########################### # Merged DigitCaps digitcaps_0 = CapsuleLayer(num_capsule=n_class, dim_capsule=16, routings=routings, name='digitcaps0')(mergedLayer) out_caps_0 = Length(name='capsnet_0')(digitcaps_0) # First Level DigitCaps digitcaps_1 = CapsuleLayer(num_capsule=n_class, dim_capsule=16, routings=routings, name='digitcaps1')(primarycaps_1) out_caps_1 = Length(name='capsnet_1')(digitcaps_1) # Second Level DigitCaps digitcaps_2 = CapsuleLayer(num_capsule=n_class, dim_capsule=12, routings=routings, name='digitcaps2')(primarycaps_2) out_caps_2 = Length(name='capsnet_2')(digitcaps_2) # Third Level DigitCaps digitcaps_3 = CapsuleLayer(num_capsule=n_class, dim_capsule=10, routings=routings, name='digitcaps3')(primarycaps_3) out_caps_3 = Length(name='capsnet_3')(digitcaps_3) ########################### Combined DigitCaps Output (used for evaluation) ########################### digitcaps = layers.merge([digitcaps_1,digitcaps_2,digitcaps_3, digitcaps_0], mode='concat', concat_axis=2, name='digitcaps') out_caps = Length(name='capsnet')(digitcaps) # Reconstruction (decoder) network y = layers.Input(shape=(n_class,)) masked_by_y = Mask()([digitcaps, y]) # The true label is used to mask the output of capsule layer. For training masked = Mask()(digitcaps) # Mask using the capsule with maximal length. For prediction # Shared Decoder model in training and prediction decoder = models.Sequential(name='decoder') decoder.add(layers.Dense(600, activation='relu', input_dim=int(digitcaps.shape[2]*n_class), name='zero_layer')) decoder.add(layers.Dense(600, activation='relu', name='one_layer')) decoderFinal = models.Sequential(name='decoderFinal') # Concatenating two layers decoderFinal.add(layers.Merge([decoder.get_layer('zero_layer'), decoder.get_layer('one_layer')], mode='concat')) decoderFinal.add(layers.Dense(1200, activation='relu')) decoderFinal.add(layers.Dense(np.prod([32,32,1]), activation='sigmoid')) decoderFinal.add(layers.Reshape(target_shape=[32,32,1], name='out_recon')) # Model for training train_model = models.Model([x, y], [out_caps_0, out_caps_1, out_caps_2, out_caps_3, decoderFinal(masked_by_y)]) # Model for evaluation (prediction) # Note that out_caps is the final prediction. Other predictions could be used for analysing separate-level predictions. eval_model = models.Model(x, [out_caps, out_caps_0, out_caps_1, out_caps_2, out_caps_3, decoderFinal(masked)]) return train_model, eval_model
weights=[embedding_matrix], input_length=MAX_LEN, trainable=False) inp = Input(shape=(MAX_LEN, )) embedding_sequence = embedding_layer(inp) convs = [] filter_sizes = [3, 4, 5] for filter_size in filter_sizes: l_conv = layers.Conv1D(filters=128, kernel_size=filter_size, activation='relu')(embedding_sequence) l_pool = layers.MaxPool1D(pool_size=3)(l_conv) convs.append(l_pool) l_merged = layers.Merge(mode='concat', concat_axis=1)(convs) conv = layers.Conv1D(filters=128, kernel_size=3, activation='relu')(embedding_sequence) pool = layers.MaxPooling1D(pool_size=3)(conv) if extra_conv: x = Dropout(0.2)(l_merged) else: x = Dropout(0.2)(pool) x = layers.Flatten()(x) x = Dense(512, activation='relu')(x) x = Dense(512, activation='relu')(x) x = Dense(512, activation='relu')(x) x = Dense(1)(x) cnn_model1 = Model(inp, x) cnn_model1.compile(optimizer='rmsprop', loss='mse', metrics=[escore])
def main(): args = setup() momentnow = time.strftime("%Y%m%d_%H%M%S") os.mkdir(momentnow) dfTrain = loadData(args.input, args.rows_to_skip) X1 = np.zeros((len(dfTrain), MAXLEN), dtype=np.uint8) X2 = np.zeros((len(dfTrain), ), dtype=np.float32) X3 = np.zeros((len(dfTrain), ), dtype=np.float32) Y = np.zeros((len(dfTrain), dfTrain["Classification"].unique().size), dtype=np.int8) categories = {} for i, row in dfTrain.iterrows(): desc = row["Concepto"] X1[i, MAXLEN - len(desc):] = [ord(c) for c in desc] X2[i] = row["Importe"] X3[i] = row["FechaRel"] Y[i, categories.setdefault(row["Classification"], len(categories))] = 1 X2 = (X2 - np.mean(X2)) / np.std(X2) inv_categories = {v: k for k, v in categories.items()} # preparing my prediction set dfPred = loadData(args.validationfile, args.rows_to_skip) X1pred = np.zeros((len(dfPred), MAXLEN), dtype=np.uint8) X2pred = np.zeros((len(dfPred), ), dtype=np.float32) X3pred = np.zeros((len(dfPred), ), dtype=np.float32) for i, row in dfPred.iterrows(): desc = row["Concepto"] X1pred[i, MAXLEN - len(desc):] = [ord(c) for c in desc] X2pred[i] = row["Importe"] X3pred[i] = row["FechaRel"] X2pred = (X2pred - np.mean(X2pred)) / np.std(X2pred) # creating my RNN model model_desc = models.Sequential() embedding = np.zeros((256, 256), dtype=np.float32) np.fill_diagonal(embedding, 1) model_desc.add( layers.embeddings.Embedding(256, 256, input_length=MAXLEN, weights=[embedding], trainable=False)) model_desc.add(layers.LSTM(128)) model_amount = models.Sequential() model_amount.add(layers.Dense(10, input_shape=(1, ), activation="relu")) model_date = models.Sequential() model_date.add(layers.Dense(10, input_shape=(1, ), activation="relu")) merged = layers.Merge((model_desc, model_amount, model_date), mode="concat") final_model = models.Sequential() final_model.add(merged) final_model.add(layers.Dense(64, activation="relu")) final_model.add(layers.Dropout(args.dropout)) final_model.add(layers.Dense(Y.shape[-1], activation="softmax")) final_model.compile(loss="categorical_crossentropy", optimizer="rmsprop", metrics=["accuracy"]) csv_logger = keras.callbacks.CSVLogger(momentnow + "/metrics_" + momentnow + ".csv") modelfit = final_model.fit([X1, X2, X3], Y, batch_size=50, epochs=args.epochs, validation_split=args.validation, shuffle=True, callbacks=[csv_logger]) print(final_model.summary()) print(modelfit.history.keys()) final_model.save(momentnow + "/model_" + momentnow + ".h5") final_model.to_json() # plot ACCURACY for training and validation sets plt.figure(figsize=(12, 5)) plt.subplot(1, 2, 1) plt.plot(modelfit.history['acc']) plt.plot(modelfit.history['val_acc']) plt.title('model accuracy') plt.ylabel('accuracy') plt.xlabel('epoch') plt.legend(['train', 'test'], loc='upper left') # plot LOSS for training and validation sets plt.subplot(1, 2, 2) plt.plot(modelfit.history['loss']) plt.plot(modelfit.history['val_loss']) plt.title('model loss') plt.ylabel('loss') plt.xlabel('epoch') plt.legend(['train', 'test'], loc='upper left') plt.savefig(momentnow + "/plotloss_" + momentnow) plt.show() YPred = final_model.predict_classes([X1pred, X2pred, X3pred], verbose=2) YTrue = [categories[x] for x in dfPred["Classification"]] print(YPred) for i in range(0, len(YPred)): print(inv_categories[YPred[i]]) hit = 0 for i in range(0, len(YPred)): if YPred[i] == YTrue[i]: hit += 1 acc_rate = hit / len(YPred) print("accuracy on prediction set: {:.6}%".format(acc_rate * 100)) labels = [inv_categories[x] for x in inv_categories] confusionmatrix = confusion_matrix(YTrue, YPred) cm_norm = confusionmatrix.astype("float") / confusionmatrix.sum( axis=1)[:, np.newaxis] cm_norm = np.round(cm_norm, 2) sns.set(font_scale=0.9) # for label size plt.figure() plotconf = sns.heatmap(cm_norm, annot=True, annot_kws={"size": 6}, cbar=False) plotconf.figure.savefig(momentnow + "/confusionmatrix_" + momentnow) destinationfile = momentnow + "/code_" + momentnow + '.py' copyfile(__file__, destinationfile)
def MultiLevelDCNet(input_shape, n_class, routings): """ A DCNet (1-level DCNet) on MNIST. :param input_shape: data shape, 3d, [width, height, channels] :param n_class: number of classes :param routings: number of routing iterations :return: Two Keras Models, the first one used for training, and the second one for evaluation. """ x = layers.Input(shape=input_shape) concat_axis = 1 if K.image_data_format() == 'channels_first' else -1 ########################### Primary Capsules ########################### # Incorporating DenseNets - Creating a dense block with 8 layers having 32 filters and 32 growth rate. conv, nb_filter = densenet.DenseBlock(x, growth_rate=32, nb_layers=8, nb_filter=32) # Batch Normalization DenseBlockOutput = BatchNormalization(axis=concat_axis, epsilon=1.1e-5)(conv) # Creating Primary Capsules # Here PrimaryCapsConv2D is the Conv2D output which is used as the primary capsules by reshaping and squashing (squash activation). # primarycaps_1 (size: [None, num_capsule, dim_capsule]) is the "reshaped and sqashed output" which will be further passed to the dynamic routing protocol. primarycaps, PrimaryCapsConv2D = PrimaryCap(DenseBlockOutput, dim_capsule=8, n_channels=32, kernel_size=9, strides=2, padding='valid') ########################### DigitCaps Output ########################### digitcaps = CapsuleLayer(num_capsule=n_class, dim_capsule=16, routings=routings, name='digitcaps0')(primarycaps) out_caps = Length(name='capsnet')(digitcaps) # Reconstruction (decoder) network y = layers.Input(shape=(n_class, )) masked_by_y = Mask()( [digitcaps, y] ) # The true label is used to mask the output of capsule layer. For training masked = Mask( )(digitcaps) # Mask using the capsule with maximal length. For prediction # Shared Decoder model in training and prediction decoder = models.Sequential(name='decoder') decoder.add( layers.Dense(512, activation='relu', input_dim=int(digitcaps.shape[2] * n_class), name='zero_layer')) decoder.add(layers.Dense(512, activation='relu', name='one_layer')) decoderFinal = models.Sequential(name='decoderFinal') # Concatenating two layers decoderFinal.add( layers.Merge( [decoder.get_layer('zero_layer'), decoder.get_layer('one_layer')], mode='concat')) decoderFinal.add(layers.Dense(1024, activation='relu')) decoderFinal.add(layers.Dense(np.prod(input_shape), activation='sigmoid')) decoderFinal.add(layers.Reshape(input_shape, name='out_recon')) # Model for training train_model = models.Model([x, y], [out_caps, decoderFinal(masked_by_y)]) # Model for evaluation (prediction) eval_model = models.Model(x, [out_caps, decoderFinal(masked)]) return train_model, eval_model
def arch(raw_input): head = raw_input if input_noise > 0.: head = kl.GaussianNoise(input_noise)(head) if initial_pooling > 1: head = kl.Lambda(lambda x: K.asymmetric_spatial_2d_padding( x, top_pad=output_horizon, bottom_pad=0, left_pad=0, right_pad=0))(head) if batchnorm: head = kl.BatchNormalization(momentum=bn_momentum)(head) head = kl.Convolution2D(nb_filters, nb_row=initial_pooling + output_horizon, nb_col=1, bias=use_bias, subsample=(initial_subsample, 1), border_mode='valid')(head) else: if batchnorm: head = kl.BatchNormalization(momentum=bn_momentum)(head) head = kl.Lambda(lambda x: K.asymmetric_spatial_2d_padding( x, top_pad=1, bottom_pad=0, left_pad=0, right_pad=0))(head) head = kl.Convolution2D(nb_filters, nb_row=2, nb_col=1, bias=use_bias, subsample=(1, 1), border_mode='valid')(head) perceptive_field = 2**nb_blocks * initial_subsample if perceptive_field < hist_length: print('History length of {} but conv block with perceptive field \ of {}. This is suboptimal'.format(hist_length, perceptive_field)) skip_connections = [] for i in range(nb_blocks): head, skip_out = wavenet_block_light( head, nb_filters, subsample=2, use_bias=use_bias, res_l2=res_l2, batchnorm=batchnorm, bn_momentum=bn_momentum, dropout_rate=dropout_rate, ) skip_connections.append(skip_out) if use_skip_connections: head = kl.Merge(mode='sum')(skip_connections) else: head = kl.Lambda(lambda x: x[:, :1, :, :])(head) head = kl.Activation('relu')(head) if batchnorm: head = kl.BatchNormalization(momentum=bn_momentum)(head) head = kl.Convolution2D( nb_filters, nb_row=1, nb_col=1, border_mode='same', bias=use_bias, )(head) head = kl.Dropout(dropout_rate)(head) if has_top: head = kl.Convolution2D(output_horizon, nb_row=1, nb_col=1, border_mode='same', bias=use_bias, W_regularizer=l2(final_l2))(head) return head
def create_model(desired_sample_rate, dilation_depth, nb_stacks): # desired_sample_rate = 4410 nb_output_bins = 4 # nb_filters = 256 nb_filters = 64 # dilation_depth = 9 # # nb_stacks = 1 use_bias = False res_l2 = 0 final_l2 = 0 fragment_length = 488 + compute_receptive_field_( desired_sample_rate, dilation_depth, nb_stacks)[0] fragment_stride = 488 use_skip_connections = True learn_all_outputs = True def residual_block(x): original_x = x # TODO: initalization, regularization? # Note: The AtrousConvolution1D with the 'causal' flag is implemented in github.com/basveeling/keras#@wavenet. tanh_out = CausalAtrousConvolution1D(nb_filters, 2, atrous_rate=2**i, border_mode='valid', causal=True, bias=use_bias, name='dilated_conv_%d_tanh_s%d' % (2**i, s), activation='tanh', W_regularizer=l2(res_l2))(x) x = layers.Dropout(0.2)(x) sigm_out = CausalAtrousConvolution1D(nb_filters, 2, atrous_rate=2**i, border_mode='valid', causal=True, bias=use_bias, name='dilated_conv_%d_sigm_s%d' % (2**i, s), activation='sigmoid', W_regularizer=l2(res_l2))(x) x = layers.Merge(mode='mul', name='gated_activation_%d_s%d' % (i, s))([tanh_out, sigm_out]) res_x = layers.Convolution1D(nb_filters, 1, border_mode='same', bias=use_bias, W_regularizer=l2(res_l2))(x) skip_x = layers.Convolution1D(nb_filters, 1, border_mode='same', bias=use_bias, W_regularizer=l2(res_l2))(x) res_x = layers.Merge(mode='sum')([original_x, res_x]) return res_x, skip_x input = Input(shape=(fragment_length, nb_output_bins), name='input_part') out = input skip_connections = [] out = CausalAtrousConvolution1D(nb_filters, 2, atrous_rate=1, border_mode='valid', causal=True, name='initial_causal_conv')(out) for s in range(nb_stacks): for i in range(0, dilation_depth + 1): out, skip_out = residual_block(out) skip_connections.append(skip_out) if use_skip_connections: out = layers.Merge(mode='sum')(skip_connections) out = layers.PReLU()(out) # out = layers.Convolution1D(nb_filter=256, filter_length=1, border_mode='same', # W_regularizer=l2(final_l2))(out) out = layers.Convolution1D(nb_filter=nb_output_bins, filter_length=3, border_mode='same')(out) out = layers.Dropout(0.5)(out) out = layers.PReLU()(out) out = layers.Convolution1D(nb_filter=nb_output_bins, filter_length=3, border_mode='same')(out) if not learn_all_outputs: raise DeprecationWarning( 'Learning on just all outputs is wasteful, now learning only inside receptive field.' ) out = layers.Lambda( lambda x: x[:, -1, :], output_shape=(out._keras_shape[-1], ))( out) # Based on gif in deepmind blog: take last output? # out = layers.Activation('softmax', name="output_softmax")(out) out = layers.PReLU()(out) # out = layers.Activation('sigmoid', name="output_sigmoid")(out) out = layers.Flatten()(out) predictions = layers.Dense(919, activation='sigmoid', name='fc1')(out) model = Model(input, predictions) # x = model.output # x = layers.Flatten()(x) # # x = layers.Dense(output_dim=1024)(x) # # x = layers.PReLU()(x) # # x = layers.Dropout(0.5)(x) # # x = layers.Dense(output_dim=919)(x) # # x = layers.Activation('sigmoid')(x) # model = Model(input=model.input, output=predictions) receptive_field, receptive_field_ms = compute_receptive_field_( desired_sample_rate, dilation_depth, nb_stacks) _log.info('Receptive Field: %d (%dms)' % (receptive_field, int(receptive_field_ms))) return model