def model_20161217_ffm_v1(feature_size): # select features # fields = feature_size.keys() FFM_L2 = 0.00002 FFM_DIM = 3 fields = [ 'leak', # 'ad_id_fact', 'weekday', 'day', 'hour', 'geo_1', # 'geo_2', # 'geo_3', # 'geo_location', 'platform', 'advertiser_id', 'campaign_id', 'document_id', ] # get model print('Create model input') model_inputs = {} for field in fields: model_inputs[field] = Input(shape=(1,), dtype='int32', name='input_' + field) print('Create ffm layers') ffm_layers = [] for field1, field2 in itertools.combinations(fields, 2): embed1 = Flatten()(Embedding( feature_size[field1] + 1, FFM_DIM, input_length=1, name='embed_{}_{}'.format(field1, field2), W_regularizer=l2_reg(FFM_L2), )(model_inputs[field1])) embed2 = Flatten()(Embedding( feature_size[field2] + 1, FFM_DIM, input_length=1, name='embed_{}_{}'.format(field2, field1), W_regularizer=l2_reg(FFM_L2), )(model_inputs[field2])) ffm_layers.append(merge( [embed1, embed2], mode='dot', dot_axes=1, )) output = Activation('sigmoid', name='output')(merge(ffm_layers, mode='sum')) # import ipdb; ipdb.set_trace() print('compile model') input_field = model_inputs.keys() model = Model(input=[model_inputs[field] for field in input_field], output=output) optimizer = Adagrad(lr=0.0002, epsilon=1e-08, decay=0.0) model.compile(optimizer=optimizer, loss='binary_crossentropy') print(model.summary()) return input_field, model
def conv_block(inputs, kernel, output_dims, reg=None): """ reg is either None or a float specifying the weight of l2 regularization """ conv = Conv2D(output_dims, kernel, padding='same', activation='relu', \ kernel_regularizer=None if reg is None else l2_reg(reg), \ bias_regularizer=None if reg is None else l2_reg(reg))(inputs) conv = Conv2D(output_dims, kernel, padding='same', activation='relu', \ kernel_regularizer=None if reg is None else l2_reg(reg), \ bias_regularizer=None if reg is None else l2_reg(reg))(conv) return MaxPooling2D()(conv)
def build_model(max_features,K=8,solver='adam',l2=0.0,l2_fm = 0.0): inputs = [] flatten_layers=[] columns = range(len(max_features)) for c in columns: inputs_c = Input(shape=(1,), dtype='int32',name = 'input_%s'%c) num_c = max_features[c] embed_c = Embedding( num_c, K, input_length=1, name = 'embed_%s'%c, W_regularizer=l2_reg(l2_fm) )(inputs_c) flatten_c = Flatten()(embed_c) inputs.append(inputs_c) flatten_layers.append(flatten_c) fm_layers = [] for emb1,emb2 in itertools.combinations(flatten_layers, 2): dot_layer = merge([emb1,emb2],mode='dot',dot_axes=1) fm_layers.append(dot_layer) for c in columns: num_c = max_features[c] embed_c = Embedding( num_c, 1, input_length=1, name = 'linear_%s'%c, W_regularizer=l2_reg(l2) )(inputs[c]) flatten_c = Flatten()(embed_c) fm_layers.append(flatten_c) flatten = merge(fm_layers,mode='sum') outputs = Activation('sigmoid',name='outputs')(flatten) model = Model(input=inputs, output=outputs) model.compile( optimizer=solver, loss= 'binary_crossentropy' ) return model
def dnn_ce(embedding_init, embedding_size, vocab_size, use_embedding, rel_init, rel_embed_size, rel_vocab_size, l2, hidden_units, hidden_activation, batch_norm): # TODO(kudkudak): Add scaling embedding_args = {} if use_embedding: embedding_args['weights'] = [embedding_init] embedding_layer = Embedding(vocab_size, embedding_size, embeddings_regularizer=l2_reg(l2), trainable=True, **embedding_args) rel_embedding_layer = Embedding(rel_vocab_size, rel_embed_size, embeddings_initializer=RandomUniform(-rel_init, rel_init), embeddings_regularizer=l2_reg(l2), trainable=True) rel_input = Input(shape=(1,), dtype='int32', name='rel') rel = rel_embedding_layer(rel_input) rel = Flatten()(rel) head_input = Input(shape=(None,), dtype='int32', name='head') head_mask_input = Input(shape=(None,), dtype='float32', name='head_mask') head = embedding_layer(head_input) head_avg = MaskAvg(output_shape=(embedding_size,))([head, head_mask_input]) tail_input = Input(shape=(None,), dtype='int32', name='tail') tail_mask_input = Input(shape=(None,), dtype='float32', name='tail_mask') tail = embedding_layer(tail_input) tail_avg = MaskAvg(output_shape=(embedding_size,))([tail, tail_mask_input]) vin = Concatenate(axis=1)([head_avg, tail_avg, rel]) u = Dense(hidden_units, kernel_initializer='random_normal')(vin) u = Activation(hidden_activation)(u) output = Dense(1, kernel_initializer='random_normal', kernel_regularizer=l2_reg(l2))(u) if batch_norm: output = BatchNormalization()(output) output = Activation('sigmoid')(output) model = Model([rel_input, head_input, head_mask_input, tail_input, tail_mask_input], [output]) model.summary() return model
def create_model(nb_filter1=16, nb_filter2=32, activation1='relu', l2_weight1=0.0, l2_weight2=0.0, dropout_rate=0.3, optimizer='adam', hidden_dims1=112, hidden_dims2=56): filter_length = 1 print('Build model...') model = Sequential() model.add( Convolution1D(nb_filter=nb_filter1, filter_length=filter_length, init='glorot_normal', border_mode='valid', activation=activation1, subsample_length=1, W_regularizer=l2_reg(l2_weight1), input_shape=(10, 112))) model.add( Convolution1D(nb_filter=nb_filter2, filter_length=1, init='glorot_normal', border_mode='valid', subsample_length=1, W_regularizer=l2_reg(l2_weight2), activation=activation1)) #model.add(Reshape((nb_filter2*10,))) model.add(Flatten()) model.add(Dense(hidden_dims1)) model.add(Activation(activation1)) model.add(Dropout(dropout_rate)) model.add(Dense(hidden_dims2)) model.add(Activation(activation1)) model.add(Dropout(dropout_rate)) model.add(Dense(2)) model.add(Activation('softmax')) model.compile(loss='binary_crossentropy', optimizer=optimizer, metrics=['binary_accuracy']) #model.summary() return model
def model_20161223_fnn_v1(feature_size): # select features # fields = feature_size.keys() FFM_L2 = 0.0000001 FFM_DIM = 5 fields = [ 'ad_id_fact', 'uuid', 'leak', 'weekday', 'day', 'hour', 'geo_1', 'geo_2', 'geo_3', 'geo_location', 'platform', 'advertiser_id', 'campaign_id', 'cat_1', 'cat_2', 'source_id', 'topic_1', 'topic_2', 'topic_3', 'topic_4', 'topic_5', 'topic_num', ] # get model print('Create model input') model_inputs = {} fnn_layers = [] for field in fields: model_inputs[field] = Input(shape=(1,), dtype='int32', name='input_' + field) embed = Flatten()(Embedding( feature_size[field] + 1, FFM_DIM, input_length=1, name='embed_{}'.format(field), W_regularizer=l2_reg(FFM_L2), )(model_inputs[field])) fnn_layers.append(embed) concat_embed = merge(fnn_layers, mode='concat') dense = Dropout(0.2)(Dense(1024, activation='tanh')(concat_embed)) dense = Dropout(0.2)(Dense(1024, activation='relu')(dense)) dense = Dropout(0.2)(Dense(512, activation='relu')(dense)) dense = SReLU()(dense) output = Dense(1, activation='sigmoid')(dense) # import ipdb; ipdb.set_trace() print('compile model') input_field = model_inputs.keys() model = Model(input=[model_inputs[field] for field in input_field], output=output) optimizer = Adadelta(lr=0.1, rho=0.9) model.compile(optimizer=optimizer, loss='binary_crossentropy') print(model.summary()) return input_field, model
def build_model(max_features, K=8, solver='adam', l2=0.0, l2_fm=0.0): inputs = [] flatten_layers = [] columns = range(len(max_features)) for c in columns: inputs_c = Input(shape=(1, ), dtype='int32', name='input_%s' % c) num_c = max_features[c] embed_c = Embedding(num_c, K, input_length=1, name='embed_%s' % c, W_regularizer=l2_reg(l2_fm))(inputs_c) flatten_c = Flatten()(embed_c) inputs.append(inputs_c) flatten_layers.append(flatten_c) fm_layers = [] for emb1, emb2 in itertools.combinations(flatten_layers, 2): dot_layer = merge([emb1, emb2], mode='dot', dot_axes=1) fm_layers.append(dot_layer) for c in columns: num_c = max_features[c] embed_c = Embedding(num_c, 1, input_length=1, name='linear_%s' % c, W_regularizer=l2_reg(l2))(inputs[c]) flatten_c = Flatten()(embed_c) fm_layers.append(flatten_c) flatten = merge(fm_layers, mode='sum') outputs = Activation('sigmoid', name='outputs')(flatten) model = Model(input=inputs, output=outputs) model.compile(optimizer=solver, loss='binary_crossentropy') return model
def build_model(conf, K=8, solver="adam", l2=0.0, l2_fm=0.0): inputs = [] flatten_layers = [] columns = range(len(max_features)) for c, m, d in conf[1]: inputs_c = Input(shape=(1, ), dtype="int32", name="input_%s" % c) embed_c = Embedding(m, K, input_length=1, name="embed_%s" % c, W_regularizer=l2_reg(l2_fm))(inputs_c) flatten_c = Flatten()(embed_c) inputs.append(inputs_c) flatten_layers.append(flatten_c) fm_layers = [] for emb1, emb2 in itertools.combinations(flatten_layers, 2): dot_layer = merge([emb1, emb2], mode="dot", dot_axes=1) fm_layers.append(dot_layer) for c in conf[0]: embed_c = Embedding(c, 1, input_length=1, name="linear_%s" % c, W_regularizer=l2_reg(l2))(inputs[c]) flatten_c = Flatten()(embed_c) fm_layers.append(flatten_c) flatten = merge(fm_layers, mode="sum") outputs = Activation("sigmoid", name="outputs")(flatten) model = Model(input=inputs, output=outputs) model.compile(optimizer=solver, loss="binary_crossentropy") return model
def linear(output_size, activation=None, bn=False, l2=0.0, input_dim=None): if l2 == 0.0: sequence = [Dense(output_size, input_dim=input_dim)] else: sequence = [Dense(output_size, W_regularizer=l2_reg(l2), input_dim=input_dim)] if bn: sequence += [BatchNormalization()] if activation is not None: sequence += [Activation(activation)] return sequence
def getModel(): model = Sequential() model.add(Lambda(lambda x: x / 255.0 - 0.5, input_shape=(64, 64, 3))) model.add( Convolution2D(24, (5, 5), strides=(2, 2), activation="relu", kernel_regularizer=l2_reg(0.0001))) model.add( Convolution2D(36, (5, 5), strides=(2, 2), activation="relu", kernel_regularizer=l2_reg(0.0001))) model.add( Convolution2D(48, (5, 5), strides=(2, 2), activation="relu", kernel_regularizer=l2_reg(0.0001))) model.add( Convolution2D(64, (3, 3), activation="relu", kernel_regularizer=l2_reg(0.0001))) model.add( Convolution2D(64, (3, 3), activation="relu", kernel_regularizer=l2_reg(0.0001))) model.add(Flatten()) model.add(Dense(120, kernel_regularizer=l2_reg(0.0001))) model.add(Dropout(0.5)) model.add(Dense(50, kernel_regularizer=l2_reg(0.0001))) model.add(Dropout(0.5)) model.add(Dense(10, kernel_regularizer=l2_reg(0.0001))) model.add(Dropout(0.5)) model.add(Dense(1, kernel_regularizer=l2_reg(0.0001))) model.summary() return model
def _init_graph(self): np.random.seed(self.seed) tf.set_random_seed(self.seed) self.feat_index = Input(shape=(self.field_size, )) #None*F self.feat_value = Input(shape=(self.field_size, )) #None*F self.embeddings = Embedding(self.feature_size, self.k, name='feature_embeddings', embeddings_regularizer=l2_reg(self.l2_fm))( self.feat_index) #None*F*k feat_value = Reshape((self.field_size, 1))(self.feat_value) #None*F*1 self.embeddings = Multiply()([self.embeddings, feat_value]) #None*F*8 ###----first order------###### self.y_first_order = Embedding(self.feature_size, 1, name='feature_bias', embeddings_regularizer=l2_reg(self.l2))( self.feat_index) #None*F*1 self.y_first_order = Multiply()([self.y_first_order, feat_value]) #None*F*1 self.y_first_order = MySumLayer(axis=1)(self.y_first_order) # None*1 self.y_first_order = Dropout(self.dropout_keep_fm[0], seed=self.seed)(self.y_first_order) ###------second order term-------### # sum_square part self.summed_feature_emb = MySumLayer(axis=1)(self.embeddings) #None*k self.summed_feature_emb_squred = Multiply()( [self.summed_feature_emb, self.summed_feature_emb]) #None*k # square_sum part self.squared_feature_emb = Multiply()( [self.embeddings, self.embeddings]) #None*F*k self.squared_sum_feature_emb = MySumLayer(axis=1)( self.squared_feature_emb) #None*k # second order self.y_second_order = Subtract()( [self.summed_feature_emb_squred, self.squared_sum_feature_emb]) #None*k self.y_second_order = Lambda(lambda x: x * 0.5)( self.y_second_order) #None*k self.y_second_order = MySumLayer(axis=1)(self.y_second_order) #None*1 self.y_second_order = Dropout(self.dropout_keep_fm[1], seed=self.seed)(self.y_second_order) ##deep self.y_deep = Reshape( (self.field_size * self.k, ))(self.embeddings) # None*(F*k) for i in range(0, len(self.deep_layers)): self.y_deep = Dense(self.deep_layers[i], activation='relu')(self.y_deep) self.y_deep = Dropout(self.dropout_keep_deep[i], seed=self.seed)(self.y_deep) #None*32 #deepFM if self.use_fm and self.use_deep: self.concat_y = Concatenate()( [self.y_first_order, self.y_second_order, self.y_deep]) elif self.use_fm: self.concat_y = Concatenate()( [self.y_first_order, self.y_second_order]) elif self.use_deep: self.concat_y = self.y_deep self.y = Dense(1, activation='sigmoid', name='main_output')(self.concat_y) #None*1 self.model = Model(inputs=[self.feat_index, self.feat_value], outputs=self.y, name='model') if self.optimizer_type == 'adam': self.optimizer = Adam(lr=self.learning_rate, decay=0.1) if self.loss_type == 'ranking_logloss': self.loss = binary_crossentropy_with_ranking print('use ranking_logloss') elif self.loss_type == 'logloss': self.loss = 'binary_crossentropy' print('use logloss') elif self.loss_type == 'mse': self.loss = 'mean_squared_error' print('use mse') if self.eval_metric == 'auc': self.metrics = auc else: self.metrics = self.eval_metric self.model.compile(optimizer=self.optimizer, loss=self.loss, metrics=[self.metrics])
def _load_model(self): inputs = Input(self.input_shape) bn0 = BatchNormalization()(inputs) conv1 = Conv2D(64, (7, 7), padding='same', kernel_regularizer=l2_reg(0.001))(bn0) bn1 = BatchNormalization()(conv1) ac1 = Activation('relu')(bn1) mp1 = MaxPooling2D(pool_size=(2, 2), strides=2)(ac1) conv2 = Conv2D(128, (7, 7), padding='same', kernel_regularizer=l2_reg(0.001))(mp1) bn2 = BatchNormalization()(conv2) ac2 = Activation('relu')(bn2) mp2 = MaxPooling2D(pool_size=(2, 2), strides=2)(ac2) conv3 = Conv2D(256, (7, 7), padding='same', kernel_regularizer=l2_reg(0.001))(mp2) bn3 = BatchNormalization()(conv3) ac3 = Activation('relu')(bn3) mp3 = MaxPooling2D(pool_size=(2, 2), strides=2)(ac3) conv4 = Conv2D(512, (7, 7), padding='same', kernel_regularizer=l2_reg(0.001))(mp3) bn4 = BatchNormalization()(conv4) ac4 = Activation('relu')(bn4) mp4 = MaxPooling2D(pool_size=(2, 2), strides=2)(ac4) up5 = UpSampling2D((2, 2))(mp4) conv5 = Conv2D(512, (7, 7), padding='same', kernel_regularizer=l2_reg(0.001))(up5) bn5 = BatchNormalization()(conv5) ac5 = Activation('relu')(bn5) up6 = UpSampling2D((2, 2))(ac5) conv6 = Conv2D(256, (7, 7), padding='same', kernel_regularizer=l2_reg(0.001))(up6) bn6 = BatchNormalization()(conv6) ac6 = Activation('relu')(bn6) up7 = UpSampling2D((2, 2))(ac6) conv7 = Conv2D(128, (7, 7), padding='same', kernel_regularizer=l2_reg(0.001))(up7) bn7 = BatchNormalization()(conv7) ac7 = Activation('relu')(bn7) up8 = UpSampling2D((2, 2))(ac7) conv8 = Conv2D(64, (7, 7), padding='same', kernel_regularizer=l2_reg(0.001))(up8) bn8 = BatchNormalization()(conv8) ac8 = Activation('relu')(bn8) conv9 = Conv2D(1, (7, 7), activation='sigmoid', padding='same', kernel_regularizer=l2_reg(0.001))(ac8) rh1 = Reshape((self.input_shape[0], self.input_shape[1]))(conv9) self.model = Model(inputs=inputs, outputs=rh1)
def build_simple_mlp(input_shape=(3, 32, 32), dropout=0.0, l2=0., training=None, n_filters=32, activation="relu", n_dense=128, kernel_size=3, n1=1, n2=1, nb_classes=10, bn=False, use_bias=True, init="glorot_uniform"): inputs = Input(shape=input_shape) x = inputs if training == False: prefix = "inference_" else: prefix = "" for id in range(n1): prefix_column = str(id) if id > 0 else "" x = Conv2D(n_filters, (kernel_size, kernel_size), padding='same', kernel_regularizer=l2_reg(l2), name=prefix_column + prefix + "conv1", use_bias=use_bias, kernel_initializer=init)(x) if bn: x = BatchNormalization(axis=3, name=prefix_column + prefix + "bn1")(x, training=training) x = Activation(activation, name=prefix_column + "act_1")(x) x = Conv2D(n_filters, (kernel_size, kernel_size), kernel_regularizer=l2_reg(l2), use_bias=use_bias, name=prefix_column + prefix + "conv2", kernel_initializer=init)(x) if bn: x = BatchNormalization(axis=3, name=prefix_column + prefix + "bn2")(x, training=training) x = Activation(activation, name=prefix_column + "act_2")(x) x = MaxPooling2D(pool_size=(2, 2))(x) for id in range(n2): prefix_column = str(id) if id > 0 else "" x = Conv2D(n_filters * 2, (kernel_size, kernel_size), use_bias=use_bias, padding='same', kernel_regularizer=l2_reg(l2), name=prefix_column + prefix + "conv3", kernel_initializer=init)(x) if bn: x = BatchNormalization(axis=3, name=prefix_column + prefix + "bn3")(x, training=training) x = Activation(activation, name=prefix_column + "act_3")(x) x = Conv2D(n_filters * 2, (kernel_size, kernel_size), use_bias=use_bias, kernel_regularizer=l2_reg(l2), name=prefix_column + prefix + "conv4", kernel_initializer=init)(x) if bn: x = BatchNormalization(axis=3, name=prefix_column + prefix + "bn4")(x, training=training) x = Activation(activation, name=prefix_column + "act_4")(x) x = MaxPooling2D(pool_size=(2, 2))(x) x = Flatten()(x) x = Dense(n_dense, kernel_regularizer=l2_reg(l2), use_bias=use_bias, name=prefix + "dense2", kernel_initializer=init)(x) if bn: x = BatchNormalization(name=prefix + "bn5")(x, training=training) x = Activation(activation, name="act_5")(x) # Post act if dropout > 0: x = Dropout(dropout)(x, training=training) x = Dense(nb_classes, activation="linear", use_bias=use_bias, name=prefix + "pre_softmax", kernel_initializer=init)(x) x = Activation(activation="softmax", name=prefix + "post_softmax")(x) model = Model(inputs=[inputs], outputs=[x]) setattr(model, "steerable_variables", {}) return model
def build_model(max_features, continue_cols, K=8, solver='adam', l2=0.0, l2_fm=0.0, is_self=False): np.random.seed(2018) inputs = [] flatten_layers = [] columns = range(len(max_features)) ###------second order term-------### for c in columns: #print (c,max_features[c]) inputs_c = Input(shape=(1, ), dtype='int32', name='input_%s' % (c)) num_c = max_features[c] inputs.append(inputs_c) #print (num_c,K,c) embed_c = Embedding(num_c, K, input_length=1, name='embed_%s' % (c), W_regularizer=l2_reg(l2_fm))(inputs_c) #print (embed_c.get_shape(),'---') #flatten_c = Flatten()(embed_c) flatten_c = Reshape((K, ))(embed_c) flatten_layers.append(flatten_c) inputs_dict = [] continue_cols_columns = range(len(continue_cols)) for col in continue_cols_columns: #print (col,continue_cols[col]) inputs_c = Input(shape=(1, ), dtype='float', name='input_sec_%s' % (col)) inputs.append(inputs_c) inputs_c = BatchNormalization(name='BN_%s' % (col))(inputs_c) inputs_dict.append(inputs_c) inputs_cK = MyLayer(output_dim=K)(inputs_c) flatten_layers.append(inputs_cK) #### F * None * K summed_features_emb = add(flatten_layers) #### None * K summed_features_emb_square = multiply( [summed_features_emb, summed_features_emb]) ##### None * K squared_features_emb = [] for layer in flatten_layers: squared_features_emb.append(multiply([layer, layer])) squared_sum_features_emb = add(squared_features_emb) ###### None * K subtract_layer = Lambda(lambda inputs: inputs[0] - inputs[1], output_shape=lambda shapes: shapes[0]) y_second_order = subtract_layer( [summed_features_emb_square, squared_sum_features_emb]) y_second_order = Lambda(lambda x: x * 0.5)(y_second_order) y_second_order = Dropout(0.9, seed=2018)(y_second_order) ###----first order------###### fm_layers = [] for c in columns: num_c = max_features[c] embed_c = Embedding(num_c, 1, input_length=1, name='linear_%s' % (c), W_regularizer=l2_reg(l2))(inputs[c]) flatten_c = Flatten()(embed_c) fm_layers.append(flatten_c) for col in continue_cols_columns: inputs_c = MyLayer(output_dim=1)(inputs_dict[col]) #layer.build(inputs_c.get_shape().as_list()) #inputs_c = RepeatVector(K)(inputs_c) #inputs_c = layer.call(inputs_c) fm_layers.append(inputs_c) #####---- None * 1 y_first_order = add(fm_layers) y_first_order = BatchNormalization()(y_first_order) y_first_order = Dropout(0.8, seed=2018)(y_first_order) ##deep y_deep = concatenate(flatten_layers) ##### None * (F*K) y_deep = Dense(32)(y_deep) y_deep = Activation('relu', name='output_1')(y_deep) y_deep = Dropout(rate=0.5, seed=2012)(y_deep) y_deep = Dense(32)(y_deep) y_deep = Activation('relu', name='output_2')(y_deep) y_deep = Dropout(rate=0.5, seed=2012)(y_deep) concat_input = concatenate([y_first_order, y_second_order, y_deep], axis=1) # concat_input=Dense(16)(concat_input) # concat_input = Activation('relu',name='concat')(concat_input) # #y_deep = Dropout(rate=0.5,seed=2012)(y_deep) # concat_input = Dropout(rate=0.5,seed=2012)(concat_input) outputs = Dense(1, activation='sigmoid', name='main_output')(concat_input) model = Model(inputs=inputs, outputs=outputs, name='model') solver = Adam(lr=0.01, decay=0.1) if (is_self == True): model.compile(optimizer=solver, loss=binary_crossentropy_with_ranking, metrics=[auc, log_loss]) else: model.compile(optimizer=solver, loss='binary_crossentropy', metrics=[auc, log_loss]) #model.fit(X,y,batch_size=batch_size,validation_data=(vali_X,vali_y),epochs=epochs) return model
input_layer = Input(shape=(80, 320, 3)) input_sides = Input(shape=(1, )) side_factor = Dense(1, use_bias=False, \ kernel_initializer=const_init(side_init))(input_sides) conv1 = conv_block(input_layer, 3, 16, reg_rate) conv2 = conv_block(conv1, 5, 32, reg_rate) conv3 = conv_block(conv2, 5, 64, reg_rate) conv4_c = conv_block(conv3, 5, 128, reg_rate) conv4_p = MaxPooling2D()(conv3) conv4 = Concatenate()([conv4_c, conv4_p]) flatten = Flatten()(conv4) hidden_1 = Dropout(dropout_rate)(flatten) # simple trigonometry to learn side_factor angle_layer = Dense(1, \ kernel_regularizer=None if reg_rate is None else l2_reg(reg_rate), \ bias_regularizer=None if reg_rate is None else l2_reg(reg_rate))(hidden_1) angle_scaling = Dense(1, use_bias=False, trainable=False, \ kernel_initializer=const_init(1/mult_factor*(np.pi*25/180)))(angle_layer) tan_angle = Lambda(tan_layer)(angle_scaling) tan_final = Add()([tan_angle, side_factor]) angle_final = Lambda(atan_layer)(tan_final) output_layer = Dense(1, use_bias=False, trainable=False, \ kernel_initializer=const_init(mult_factor/(np.pi*25/180)))(angle_final) model = Model(inputs=[input_layer, input_sides], outputs=output_layer) model.summary() print(((y_train*mult_factor)**2).mean(), \ ' '.join(str(((y_valid*mult_factor)**2).mean()) for y_valid in ys_valid))
def factorized(embedding_init, embedding_size, vocab_size, use_embedding, l2_a, rel_vocab_size, rel_init, bias_init, hidden_units, hidden_activation, merge, merge_weight, batch_norm, bias_trick, use_tailrel=True, use_headrel=True, emb_drop=0.0, trainable_word_embeddings=True, use_headtail=True, share_mode=False, l2_b=0): """ score(head, rel, tail) = s1(head, rel) + s2(rel, tail) + s3(tail, head) s1(head, rel) = <Ahead, Brel> = headA^TBrel """ embedding_args = {} if use_embedding: logging.info("Loading weights") embedding_args['weights'] = [embedding_init] embedding_layer = Embedding(vocab_size, embedding_size, embeddings_regularizer=l2_reg(l2_a), trainable=trainable_word_embeddings, **embedding_args) embedding_drop_layer = Dropout(emb_drop) rel_embedding_layer = Embedding(rel_vocab_size, embedding_size, embeddings_regularizer=l2_reg(l2_a), embeddings_initializer=RandomUniform(-rel_init, rel_init), trainable=True) dense_args = {"kernel_regularizer": l2_reg(l2_b)} if share_mode == 0: # score = <Ahead, Btail> + <Chead, Drel> + <Etail, Frel> dense_layer_head1 = Dense(hidden_units, activation=hidden_activation, **dense_args) dense_layer_head2 = Dense(hidden_units, activation=hidden_activation, **dense_args) dense_layer_rel1 = Dense(hidden_units, activation=hidden_activation, **dense_args) dense_layer_rel2 = Dense(hidden_units, activation=hidden_activation, **dense_args) dense_layer_tail1 = Dense(hidden_units, activation=hidden_activation, **dense_args) dense_layer_tail2 = Dense(hidden_units, activation=hidden_activation, **dense_args) elif share_mode == 1: # score = <Ahead, Btail> + <Ahead, Brel> + <Btail, Arel> dense_layer_head1 = Dense(hidden_units, activation=hidden_activation, **dense_args) dense_layer_head2 = Dense(hidden_units, activation=hidden_activation, **dense_args) dense_layer_rel1 = dense_layer_head1 dense_layer_rel2 = dense_layer_head2 dense_layer_tail1 = dense_layer_head1 dense_layer_tail2 = dense_layer_head2 elif share_mode == 3: # score = <Ahead, Atail> + <Ahead, Arel> + <Atail, Arel> dense_layer_head1 = Dense(hidden_units, activation=hidden_activation, **dense_args) dense_layer_head2 = dense_layer_head1 dense_layer_rel1 = dense_layer_head1 dense_layer_rel2 = dense_layer_head2 dense_layer_tail1 = dense_layer_head1 dense_layer_tail2 = dense_layer_head2 elif share_mode == 4: # score = <Ahead, Atail> + <Ahead, Brel> + <Atail, Brel> dense_layer_head1 = Dense(hidden_units, activation=hidden_activation, **dense_args) dense_layer_head2 = lambda x: x rel_embedding_layer = Embedding(rel_vocab_size, hidden_units, embeddings_regularizer=l2_reg(l2), embeddings_initializer=RandomUniform(-rel_init, rel_init), trainable=True) dense_layer_rel1 = lambda x: x dense_layer_rel2 = dense_layer_head1 dense_layer_tail1 = dense_layer_head1 dense_layer_tail2 = dense_layer_head1 elif share_mode == 5: # score = <Ahead, Atail> + <Bhead, Crel> + <Dtail, Crel> dense_layer_head1 = Dense(hidden_units, activation=hidden_activation, **dense_args) dense_layer_head2 = lambda x: x rel_embedding_layer = Embedding(rel_vocab_size, hidden_units, embeddings_regularizer=l2_reg(l2), embeddings_initializer=RandomUniform(-rel_init, rel_init), trainable=True) dense_layer_rel1 = lambda x: x dense_layer_rel2 = Dense(hidden_units, activation=hidden_activation, **dense_args) dense_layer_tail1 = Dense(hidden_units, activation=hidden_activation, **dense_args) dense_layer_tail2 = dense_layer_tail1 else: raise NotImplementedError() head_input = Input(shape=(None,), dtype='int32', name='head') head_mask_input = Input(shape=(None,), dtype='float32', name='head_mask') head = embedding_layer(head_input) head = embedding_drop_layer(head) head_avg = MaskAvg(output_shape=(embedding_size,))([head, head_mask_input]) tail_input = Input(shape=(None,), dtype='int32', name='tail') tail_mask_input = Input(shape=(None,), dtype='float32', name='tail_mask') tail = embedding_layer(tail_input) tail = embedding_drop_layer(tail) tail_avg = MaskAvg(output_shape=(embedding_size,))([tail, tail_mask_input]) rel_input = Input(shape=(1,), dtype='int32', name='rel') rel = rel_embedding_layer(rel_input) rel = Flatten()(rel) head_rel = Dot(1, normalize=True)([dense_layer_head1(head_avg), dense_layer_head2(rel)]) rel_tail = Dot(1, normalize=True)([dense_layer_rel1(rel), dense_layer_rel2(tail_avg)]) head_tail = Dot(1, normalize=True)([dense_layer_tail1(head_avg), dense_layer_tail2(tail_avg)]) if merge_weight == True: head_rel = Dense(1, kernel_initializer='ones')(head_rel) rel_tail = Dense(1, kernel_initializer='ones')(rel_tail) head_tail = Dense(1, kernel_initializer='ones')(head_tail) to_merge = [] if use_headtail: to_merge.append(head_tail) if use_headrel: to_merge.append(head_rel) if use_tailrel: to_merge.append(rel_tail) if len(to_merge) > 1: if merge == 'add': score = Add()(to_merge) elif merge == 'max': score = Maximum()([head_rel, rel_tail, head_tail]) elif merge == 'avg': score = Average()([head_rel, rel_tail, head_tail]) # elif merge == "rel_att": # which_rel = Dense(dim=3) # def picker_fnc(xxx): # a, b, c, rel = xxx # picker = Lambda(output_shape=(1,)) else: raise NotImplementedError('Merge function ', merge, ' must be one of ["add","maximum"]') else: score = to_merge[0] if bias_trick: score = Dense(1, kernel_initializer='ones', bias_initializer=Constant(bias_init), kernel_regularizer=l2_reg(l2_a), trainable=True, )(score) else: score = Dense(1, kernel_regularizer=l2_reg(l2_a), trainable=True, )(score) if batch_norm: score = BatchNormalization()(score) output = Activation(activation='sigmoid')(score) model = Model([rel_input, head_input, head_mask_input, tail_input, tail_mask_input], [output]) model.summary() return model
def build_resnet(n, l2, nb_classes, pool_size=8, n_stages=3, init_scale=1.0, input_dim=[3, 32, 32], k=1, normalization="bn", resnet_dropout=0.2, resnet_activation='relu', training=None, seed=777): """ Builds "original" resnet. """ if training is False: prefix_name = "inference_" else: prefix_name = "" F_N = 0 F_blocks, F_states, states = [], [], [] init_fnc = he_normal_scaled(init_scale) inputs = Input(shape=input_dim) x = Conv2D(k * 16, (3, 3), padding='same', name=prefix_name + "first_conv", data_format=DATA_FORMAT, kernel_regularizer=l2_reg(l2), kernel_initializer=init_fnc)(inputs) logging.info(DATA_FORMAT) logging.info("First x shape " + str(K.int_shape(x))) id_layer = 0 for stage in range(n_stages): # Configure stage n_filters = k * ((2**stage) * 16) n_layers_stage = n logging.info("stage {} n_filters {} n_layers {}".format( stage, n_filters, n_layers_stage)) for i in range(n_layers_stage): F_block = _residual_block(nb_filters=n_filters, scale_init=init_scale, normalization=normalization, prefix_name=prefix_name, id="{}_{}".format(i, stage), activation=resnet_activation, subsample=1 if (i > 0 or stage == 0) else 2, l2=l2, dropout=resnet_dropout) x_next, residue, x = F_block.call(x, training=training) # Book-keeping F_N += 1 F_blocks.append(F_block) states.append(x) F_states.append(residue) x = x_next logging.info(K.int_shape(x)) id_layer += 1 # Last state (so there are F_N + 1 states) states.append(x) if normalization == "bn": post_bn = BatchNormalization( axis=3 if DATA_FORMAT == "channels_last" else 1, name=prefix_name + "post_bn") elif normalization == "none": post_bn = lambda x, training: x else: raise NotImplementedError() post_act = Activation('relu', name=prefix_name + "post_act") logging.info(K.int_shape(x)) post_pool = AveragePooling2D(pool_size=(pool_size, pool_size), strides=None, padding='valid', data_format=DATA_FORMAT, name=prefix_name + "post_avg") post_flatten = Flatten() pre_softmax = Dense(nb_classes, activation='linear', kernel_regularizer=l2_reg(l2), name=prefix_name + "pre_softmax") post_softmax = Activation(activation='softmax', name=prefix_name + "post_softmax") prediction_layer = lambda xx: post_softmax(pre_softmax(xx)) predictions = prediction_layer( post_flatten(post_pool(post_act(post_bn(x, training=training))))) model = Model(input=inputs, output=predictions) meta = { "F_states": F_states, "states": states, "F_blocks": F_blocks, "F_N": F_N, "postnet": lambda xx: prediction_layer( post_flatten(post_pool(post_act(post_bn(xx))))) } setattr(model, "steerable_variables", {}) return model, meta
def __init__(self, nb_filters=16, subsample=1, l2=0.0, id=0, activation="relu", scale_init=1.0, dropout=0.5, normalization="bn", identity=True, prefix_name=""): self.layers = [] self.prefix_name = prefix_name self.id = id if normalization == "bn": y = BatchNormalization( axis=3 if DATA_FORMAT == "channels_last" else 1, name=prefix_name + "block_bn_1_id_" + str(id)) self.layers.append(y) elif normalization == "none": pass else: raise NotImplementedError() if activation == "relu": y = Activation('relu', name=prefix_name + 'act_0_id_' + str(id)) elif activation == "tanh": y = Activation("tanh", name=prefix_name + 'act_0_id_' + str(id)) else: raise NotImplementedError() self.layers.append(y) # TODO: Is seed avoided correctly? init_fnc = he_normal_scaled(scale_init) y = Conv2D(nb_filters, 3, strides=(subsample, subsample), kernel_regularizer=l2_reg(l2), name=prefix_name + "conv_1_id_" + str(id), padding='same', data_format=DATA_FORMAT, kernel_initializer=init_fnc) self.layers.append(y) if normalization == "bn": y = BatchNormalization( axis=3 if DATA_FORMAT == "channels_last" else 1, name=prefix_name + "block_bn_2_id_" + str(id)) self.layers.append(y) elif normalization == "none": pass else: raise NotImplementedError() if activation == "relu": y = Activation('relu', name=prefix_name + 'act_1_id_' + str(id)) elif activation == "tanh": y = Activation('tanh', name=prefix_name + 'act_1_id_' + str(id)) else: raise NotImplementedError() self.layers.append(y) y = Dropout(dropout) self.layers.append(y) y = Conv2D(nb_filters, 3, strides=(1, 1), kernel_regularizer=l2_reg(l2), name=prefix_name + "conv_2_id_" + str(id), padding='same', data_format=DATA_FORMAT, kernel_initializer=init_fnc) self.layers.append(y) if subsample > 1: self.shortcut = Conv2D(nb_filters, (1, 1), strides=(subsample, subsample), name=prefix_name + "shortcut_id_" + str(id), kernel_regularizer=l2_reg(l2), kernel_initializer=init_fnc, padding='same', data_format=DATA_FORMAT) self.identity = identity self.id_bn = 0 self.id_scaler = 0
def extended_standard_model(param_dict): """ Extended standard model for text processing (deeper/more parameters) """ model = Sequential() model.add( Embedding(input_dim=param_dict['alphabet'], output_dim=1024, input_length=param_dict['length_sen'])) model.add(Dropout(rate=param_dict['drop_rate'])) model.add( Conv1D(filters=128, kernel_size=20, activation=None, kernel_regularizer=l2_reg(param_dict['l2_reg']))) model.add(BatchNormalization()) model.add(Activation('relu')) model.add(MaxPooling1D(pool_size=2)) model.add(Dropout(rate=param_dict['drop_rate'])) model.add( Conv1D(filters=256, kernel_size=10, activation=None, kernel_regularizer=l2_reg(param_dict['l2_reg']))) model.add(BatchNormalization()) model.add(Activation('relu')) model.add(MaxPooling1D(pool_size=2)) model.add(Dropout(rate=param_dict['drop_rate'])) model.add( Conv1D(filters=512, kernel_size=5, activation=None, kernel_regularizer=l2_reg(param_dict['l2_reg']))) model.add(BatchNormalization()) model.add(Activation('relu')) model.add(MaxPooling1D(pool_size=2)) model.add(Dropout(rate=param_dict['drop_rate'])) model.add( Conv1D(filters=1024, kernel_size=3, activation=None, kernel_regularizer=l2_reg(param_dict['l2_reg']))) model.add(BatchNormalization()) model.add(Activation('relu')) model.add(MaxPooling1D(pool_size=2)) model.add(Dropout(rate=param_dict['drop_rate'])) model.add( LSTM(units=512, activation='tanh', recurrent_activation='hard_sigmoid', dropout=param_dict['drop_rate'], recurrent_dropout=param_dict['drop_rate'], return_sequences=True, kernel_regularizer=l2_reg(param_dict['l2_reg']), recurrent_regularizer=l2_reg(param_dict['l2_reg']))) model.add( LSTM(units=512, activation='tanh', recurrent_activation='hard_sigmoid', dropout=param_dict['drop_rate'], recurrent_dropout=param_dict['drop_rate'], return_sequences=True, kernel_regularizer=l2_reg(param_dict['l2_reg']), recurrent_regularizer=l2_reg(param_dict['l2_reg']))) model.add( LSTM(units=512, activation='tanh', recurrent_activation='hard_sigmoid', dropout=param_dict['drop_rate'], recurrent_dropout=param_dict['drop_rate'], kernel_regularizer=l2_reg(param_dict['l2_reg']), recurrent_regularizer=l2_reg(param_dict['l2_reg']))) model.add( Dense(512, activation=None, kernel_regularizer=l2_reg(param_dict['l2_reg']))) model.add(BatchNormalization()) model.add(Activation('relu')) model.add(Dropout(rate=param_dict['drop_rate'])) model.add( Dense(5, activation='softmax', kernel_regularizer=l2_reg(param_dict['l2_reg']))) return model
def simple_multimodal_mlp(config, training=None): """ Builds simple MLP Follows https://arxiv.org/pdf/1501.00102.pdf """ if training is False: prefix_name = "inference_" else: prefix_name = "" img_rows, img_cols = 28, 28 num_classes = 10 input_shape = (img_rows, img_cols, 1) segment_shape = (img_rows / 2, img_cols / 2, 1) seed_init = config['seed'] freeze_modalities = eval(config.get("freeze_modalities", "[]")) # (28, 28, 1) x = Input(input_shape) def get_segment(x, id): """ 0 1 2 3 """ if id == 0: return x[:, 0:(img_rows / 2), :][:, :, 0:(img_cols / 2)] elif id == 1: return x[:, 0:(img_rows / 2), :][:, :, (img_cols / 2):] elif id == 2: return x[:, (img_rows / 2):, :][:, :, 0:(img_cols / 2)] elif id == 3: return x[:, (img_rows / 2):, :][:, :, (img_cols / 2):] else: raise NotImplementedError() segments = [ Flatten()(Lambda(get_segment, arguments={"id": id}, output_shape=segment_shape)(x)) for id in range(4) ] steerable_variables = {} segments_h = [] for h_id, h in enumerate(segments): for id in range(config['k']): seed_init += 1 dense = Dense(config['dim'], activation='linear', name=prefix_name + "dense_segment" + str(h_id) + "_" + str(id), kernel_regularizer=l2_reg(config['l2'])) if h_id in freeze_modalities: logging.warning("Freezing modality " + str(h_id)) dense.trainable = False h = dense(h) if config['bn']: h = BatchNormalization(axis=1, name=prefix_name + "bn_" + str(h_id) + "_" + str(id))(h, training=training) h = Activation(config['activation'], name=prefix_name + "act_" + str(h_id) + "_" + str(id))(h) h = Dropout(config['dropout'])(h, training=training) segments_h.append(h) h = merge(segments_h, mode="concat", concat_axis=1, name="merger_modalities") h = Dense(config['dim2'], activation='linear', name=prefix_name + "final_dense", kernel_regularizer=l2_reg(config['l2']))(h) if config['bn']: h = BatchNormalization(axis=1, name=prefix_name + "final_bn")( h, training=training) h = Activation(activation=config['activation'], name=prefix_name + "final_dense_act")(h) h = Dropout(config['dropout'])(h, training=training) h = Dense(num_classes, name=prefix_name + "pre_softmax", activation="linear")(h) out = Activation("softmax", name=prefix_name + "final_softmax")(h) model = Model([x], out) setattr(model, "steerable_variables", steerable_variables) logger.info(steerable_variables) return model