def __init__(self): self.DATASET = 'restaurant' # 'twitter', 'restaurant', 'laptop' self.POLARITIES_DIM = 3 self.EMBEDDING_DIM = 100 self.LEARNING_RATE = 0.01 self.LSTM_PARAMS = { 'units': 200, 'activation': 'tanh', 'recurrent_activation': 'sigmoid', 'kernel_initializer': initializers.RandomUniform(minval=-0.003, maxval=0.003), 'recurrent_initializer': initializers.RandomUniform(minval=-0.003, maxval=0.003), 'bias_initializer': initializers.RandomUniform(minval=-0.003, maxval=0.003), 'kernel_regularizer': regularizers.l2(0.001), 'recurrent_regularizer': regularizers.l2(0.001), 'bias_regularizer': regularizers.l2(0.001), 'dropout': 0, 'recurrent_dropout': 0, } self.MAX_SEQUENCE_LENGTH = 80 self.MAX_ASPECT_LENGTH = 2 self.BATCH_SIZE = 200 self.ITERATION = 500 self.texts_raw_indices, self.texts_left_indices, self.aspects_indices, self.texts_right_indices, \ self.polarities_matrix, \ self.embedding_matrix, \ self.tokenizer = \ read_dataset(type=self.DATASET, mode='train', embedding_dim=self.EMBEDDING_DIM, max_seq_len=self.MAX_SEQUENCE_LENGTH, max_aspect_len=self.MAX_ASPECT_LENGTH) self.left_input = np.concatenate((self.texts_left_indices, self.aspects_indices), axis=1) self.right_input = np.concatenate((self.texts_right_indices, self.aspects_indices), axis=1) if os.path.exists('td_lstm_saved_model.h5'): print('loading saved model...') self.model = load_model('td_lstm_saved_model.h5') else: print('Build model...') inputs_l = Input(shape=(self.MAX_SEQUENCE_LENGTH + self.MAX_ASPECT_LENGTH,)) inputs_r = Input(shape=(self.MAX_SEQUENCE_LENGTH + self.MAX_ASPECT_LENGTH,)) Embedding_Layer = Embedding(input_dim=len(self.tokenizer.word_index) + 1, output_dim=self.EMBEDDING_DIM, input_length=self.MAX_SEQUENCE_LENGTH + self.MAX_ASPECT_LENGTH, weights=[self.embedding_matrix], trainable=False) x_l = Embedding_Layer(inputs_l) x_r = Embedding_Layer(inputs_r) x_l = LSTM(**self.LSTM_PARAMS)(x_l) x_r = LSTM(**self.LSTM_PARAMS, go_backwards=True)(x_r) x = Concatenate()([x_l, x_r]) x = Dense(self.POLARITIES_DIM)(x) predictions = Activation('softmax')(x) model = Model(inputs=[inputs_l, inputs_r], outputs=predictions) model.summary() model.compile(loss='categorical_crossentropy', optimizer=optimizers.Adam(lr=self.LEARNING_RATE), metrics=['acc']) # plot_model(model, to_file='model.png') self.model = model
def __init__(self, units, activation=None, is_base_trainable=True, is_diag_start_trainable=True, is_diag_end_trainable=True, use_bias=False, base_initializer='GlorotUniform', diag_start_initializer='optimized_uniform', diag_end_initializer='optimized_uniform', bias_initializer='zeros', base_regularizer=None, diag_regularizer=None, bias_regularizer=None, activity_regularizer=None, base_constraint=None, diag_constraint=None, bias_constraint=None, **kwargs): super(Spectral, self).__init__(activity_regularizer=activity_regularizer, **kwargs) self.units = int(units) if not isinstance(units, int) else units self.activation = activations.get(activation) self.is_base_trainable = is_base_trainable self.is_diag_start_trainable = is_diag_start_trainable self.is_diag_end_trainable = is_diag_end_trainable self.use_bias = use_bias # 'optimized_uniform' initializers optmized by Buffoni and Giambagli if base_initializer == 'optimized_uniform': self.base_initializer = initializers.RandomUniform(-0.02, 0.02) else: self.base_initializer = initializers.get(base_initializer) if diag_start_initializer == 'optimized_uniform': self.diag_start_initializer = initializers.RandomUniform(-0.5, 0.5) else: self.diag_start_initializer = initializers.get( diag_start_initializer) if diag_end_initializer == 'optimized_uniform': self.diag_end_initializer = initializers.RandomUniform(-0.5, 0.5) else: self.diag_end_initializer = initializers.get(diag_end_initializer) self.bias_initializer = initializers.get(bias_initializer) self.base_regularizer = regularizers.get(base_regularizer) self.diag_regularizer = regularizers.get(diag_regularizer) self.bias_regularizer = regularizers.get(bias_regularizer) self.base_constraint = constraints.get(base_constraint) self.diag_constraint = constraints.get(diag_constraint) self.bias_constraint = constraints.get(bias_constraint)
def build(self, input_shape): assert len(input_shape) >= 2 input_dim = input_shape[1].value if self.H == 'Glorot': self.H = np.float32(np.sqrt(1.5 / (input_dim + self.units))) # print('Glorot H: {}'.format(self.H)) if self.kernel_lr_multiplier == 'Glorot': self.kernel_lr_multiplier = np.float32( 1. / np.sqrt(1.5 / (input_dim + self.units))) # print('Glorot learning rate multiplier: {}'.format(self.lr_multiplier)) self.kernel_constraint = Clip(-self.H, self.H) self.kernel_initializer = initializers.RandomUniform(-self.H, self.H) self.kernel = self.add_weight(shape=(input_dim, self.units), initializer=self.kernel_initializer, name='kernel', regularizer=self.kernel_regularizer, constraint=self.kernel_constraint) if self.use_bias: self.lr_multipliers = [ self.kernel_lr_multiplier, self.bias_lr_multiplier ] self.bias = self.add_weight(shape=(self.output_dim, ), initializer=self.bias_initializer, name='bias', regularizer=self.bias_regularizer, constraint=self.bias_constraint) else: self.lr_multipliers = [self.kernel_lr_multiplier] self.bias = None self.built = True
def build(self, input_shape): init = initializers.RandomUniform(minval=-50, maxval=50, seed=None) self.kernel = self.add_weight(name='kernel', shape=(self.height, self.width, 3), initializer=init, trainable=True) super(InputReflect, self).build(input_shape)
def __init__(self): self.DATASET = 'twitter' # 'twitter', 'restaurant', 'laptop' self.POLARITIES_DIM = 3 self.EMBEDDING_DIM = 100 self.LEARNING_RATE = 0.01 self.INITIALIZER = initializers.RandomUniform(minval=-0.003, maxval=0.003) self.REGULARIZER = regularizers.l2(0.001) self.LSTM_PARAMS = { 'units': 200, 'activation': 'tanh', 'recurrent_activation': 'sigmoid', 'kernel_initializer': self.INITIALIZER, 'recurrent_initializer': self.INITIALIZER, 'bias_initializer': self.INITIALIZER, 'kernel_regularizer': self.REGULARIZER, 'recurrent_regularizer': self.REGULARIZER, 'bias_regularizer': self.REGULARIZER, 'dropout': 0, 'recurrent_dropout': 0, } self.MAX_SEQUENCE_LENGTH = 80 self.MAX_ASPECT_LENGTH = 10 self.BATCH_SIZE = 200 self.EPOCHS = 100 self.texts_raw_indices, self.texts_raw_without_aspects_indices, self.texts_left_indices, self.texts_left_with_aspects_indices, \ self.aspects_indices, self.texts_right_indices, self.texts_right_with_aspects_indices, \ self.polarities_matrix, \ self.embedding_matrix, \ self.tokenizer = \ read_dataset(type=self.DATASET, mode='train', embedding_dim=self.EMBEDDING_DIM, max_seq_len=self.MAX_SEQUENCE_LENGTH, max_aspect_len=self.MAX_ASPECT_LENGTH) if os.path.exists('lstm_saved_model.h5'): print('loading saved model...') self.model = load_model('lstm_saved_model.h5') else: print('Build model...') inputs = Input(shape=(self.MAX_SEQUENCE_LENGTH, )) x = Embedding(input_dim=len(self.tokenizer.word_index) + 1, output_dim=self.EMBEDDING_DIM, input_length=self.MAX_SEQUENCE_LENGTH, weights=[self.embedding_matrix], trainable=False)(inputs) x = LSTM(**self.LSTM_PARAMS)(x) x = Dense(self.POLARITIES_DIM)(x) predictions = Activation('softmax')(x) model = Model(inputs, predictions) model.summary() model.compile(loss='categorical_crossentropy', optimizer=optimizers.Adam(lr=self.LEARNING_RATE), metrics=['acc', f1]) # plot_model(model, to_file='model.png') self.model = model
def get_initializers(self, params): default_params = {'bias_param':0, 'reg': {'l1': 0, 'l2': 0}, 'group_l': {'l1': 0, 'l2': 0}} backends_li = ['keras', 'pytorch'] dist_dict = {'normal': {'mean': 0, 'stddev': 1}, 'uniform': {'minval': 0, 'maxval': 0}} for backend in backends_li:# prepares a nested default config dict() for dist, pars in dist_dict.items(): deep_set(default_params, ['backend', backend, 'distrib', dist], pars, accessor=lambda default_params, k: default_params.setdefault(k, dict())) self.default_backend_dist_params = default_params for key, vals in params.items(): sub_dict = default_params.copy() if key not in ['hyper_params', 'model_nas_params']: params[key].update({'bias_param':sub_dict['bias_param'] if 'bias_param' not in vals.keys() else params[key]['bias_param']})# else params[key]['bias_param']}) params[key].update({'bias':keras.initializers.Constant(value=params[key]['bias_param'])})#sub_dict['bias_param'] if 'bias' not in vals.keys() else params[key]['bias'])}) if 'regularizers' not in vals.keys(): params[key].update({'regularizers':sub_dict['reg']})#add default regularization if 'group_lasso' not in vals.keys(): params[key].update({'group_lasso':sub_dict['group_l']})#add default regularization if 'kernel_params' in vals.keys(): custom_params = vals['kernel_params'] if 'backend' not in custom_params.keys() or 'backend' == 'keras': rel_li = ['backend', 'keras', 'distrib', custom_params['distrib'] ] if 'distrib' in custom_params else ['backend', 'keras', 'distrib', 'normal'] rel_dict = deep_get(sub_dict, rel_li).copy() rel_dict.update(custom_params) params[key].update({'kernel': initializers.RandomNormal(mean=rel_dict['mean'], stddev=rel_dict['stddev']) if 'normal' in rel_li else initializers.RandomUniform(minval=rel_dict['minval'], maxval=rel_dict['maxval'])}) else:#for non-keras backend if not custom_params['backend'] in self.default_backend_dist_params['backend'].keys(): raise ValueError('Backend: {} and its distributions are not yet defined in Generalised Model'.format( custom_params['backend'])) rel_li = ['backend', custom_params['backend'], 'distrib', custom_params['distrib']] rel_dict = deep_get(sub_dict, rel_li).copy() rel_dict.update(custom_params) params[key].update({'kernel': initializers.RandomNormal(mean=rel_dict['mean'], stddev=rel_dict['stddev']) if 'normal' in rel_li else initializers.RandomUniform(minval=rel_dict['minval'], maxval=rel_dict['maxval'])}) self._model_params.update(params)
def build(self, input_shape): if self.data_format == 'channels_first': channel_axis = 1 else: channel_axis = -1 if input_shape[channel_axis] is None: raise ValueError('The channel dimension of the inputs ' 'should be defined. Found `None`.') input_dim = input_shape[channel_axis] kernel_shape = self.kernel_size + (input_dim, self.filters) base = self.kernel_size[0] * self.kernel_size[1] if self.H == 'Glorot': nb_input = int(input_dim * base) nb_output = int(self.filters * base) self.H = np.float32(np.sqrt(1.5 / (nb_input + nb_output))) # print('Glorot H: {}'.format(self.H)) if self.kernel_lr_multiplier == 'Glorot': nb_input = int(input_dim * base) nb_output = int(self.filters * base) self.kernel_lr_multiplier = np.float32( 1. / np.sqrt(1.5 / (nb_input + nb_output))) # print('Glorot learning rate multiplier: {}'.format(self.lr_multiplier)) self.kernel_constraint = Clip(-self.H, self.H) self.kernel_initializer = initializers.RandomUniform(-self.H, self.H) self.kernel = self.add_weight(shape=kernel_shape, initializer=self.kernel_initializer, name='kernel', regularizer=self.kernel_regularizer, constraint=self.kernel_constraint) if self.use_bias: self.lr_multipliers = [ self.kernel_lr_multiplier, self.bias_lr_multiplier ] self.bias = self.add_weight((self.output_dim, ), initializer=self.bias_initializers, name='bias', regularizer=self.bias_regularizer, constraint=self.bias_constraint) else: self.lr_multipliers = [self.kernel_lr_multiplier] self.bias = None self.built = True
x1 = [] x2 = [] for i in x: x1.append(i[0]) x2.append(i[1]) y = np.array([7.8, 8.6, 8.7, 7.9, 8.4, 8.9, 10.4, 11.6, 13.9, 15.8]) print(x.shape) print(y.shape) with tf.device('/cpu:0'): inputs = layers.Input(shape=(2, )) out = layers.Dense(1, use_bias=False, kernel_initializer=initializers.RandomUniform())(inputs) model = Model(inputs=inputs, outputs=out) dataset = tf.data.Dataset.from_tensor_slices((x, y)).batch(3).repeat() # print(dataset.take(1)) model.compile( optimizer=tf.train.GradientDescentOptimizer(learning_rate=0.0001), loss='mean_squared_error') print_weights = callbacks.LambdaCallback( on_epoch_end=lambda batch, logs: print(model.layers[1].get_weights())) model.summary() model.fit(dataset, epochs=20, steps_per_epoch=1, callbacks=[print_weights])
def __init__(self, embedding_dim=100, batch_size=64, n_hidden=100, learning_rate=0.01, n_class=3, max_sentence_len=40, l2_reg_val=0.003): ############################ self.DATASET = ['twitter', 'restaurant'] self.TASK_INDICES = [ 1002, 1003, 1005 ] ##1001-twitter, 1002-restaurant, 1003-laptop, 1004-others, 1005-general self.LOSS_WEIGHTS = {1002: 0.5, 1003: 0.5, 1005: 0.5} self.MODEL_TO_LOAD = './models/mtl_absa_saved_model.h5' ########################### self.EMBEDDING_DIM = embedding_dim self.BATCH_SIZE = batch_size self.N_HIDDEN = n_hidden self.LEARNING_RATE = learning_rate self.N_CLASS = n_class self.MAX_SENTENCE_LENGTH = max_sentence_len self.EPOCHS = 4 self.L2_REG_VAL = l2_reg_val self.MAX_ASPECT_LENGTH = 5 self.INITIALIZER = initializers.RandomUniform(minval=-0.003, maxval=0.003) self.REGULARIZER = regularizers.l2(self.L2_REG_VAL) self.LSTM_PARAMS = { 'units': self.N_HIDDEN, 'activation': 'tanh', 'recurrent_activation': 'hard_sigmoid', 'dropout': 0, 'recurrent_dropout': 0 } self.DENSE_PARAMS = { 'kernel_initializer': self.INITIALIZER, 'bias_initializer': self.INITIALIZER, 'kernel_regularizer': self.REGULARIZER, 'bias_regularizer': self.REGULARIZER, 'dtype': 'float32' } self.texts_raw_indices, self.texts_raw_without_aspects_indices, self.texts_left_indices, self.texts_left_with_aspects_indices, \ self.aspects_indices, self.texts_right_indices, self.texts_right_with_aspects_indices, self.dataset_index,\ self.polarities_matrix,self.polarities,\ self.embedding_matrix, \ self.tokenizer = \ read_dataset(types=self.DATASET, mode='train', embedding_dim=self.EMBEDDING_DIM, max_seq_len=self.MAX_SENTENCE_LENGTH, max_aspect_len=self.MAX_ASPECT_LENGTH) print('Build model...') inputs_l = Input(shape=(self.MAX_SENTENCE_LENGTH, ), dtype='int64', name="input_l") inputs_r = Input(shape=(self.MAX_SENTENCE_LENGTH, ), dtype='int64', name="input_r") input_dataset = Input(shape=(1, ), dtype='float32', name="input_dataset") Embedding_Layer = Embedding(input_dim=len(self.embedding_matrix), output_dim=self.EMBEDDING_DIM, input_length=self.MAX_SENTENCE_LENGTH, mask_zero=True, weights=[self.embedding_matrix], trainable=False) x_l = Embedding_Layer(inputs_l) x_r = Embedding_Layer(inputs_r) x_l = LSTM(name='sentence_left', **self.LSTM_PARAMS)(x_l) x_r = LSTM(go_backwards=True, name='sentence_right', **self.LSTM_PARAMS)(x_r) x = Concatenate(name='last_shared')([x_l, x_r]) #twitter task layers tw_x = Dense(self.N_HIDDEN, name='t1_dense_10', **self.DENSE_PARAMS)(x) twitter_x = Dense(self.N_CLASS, name='t1_dense_3', **self.DENSE_PARAMS)(tw_x) twitter_x = Concatenate(name="twitter_output")( [twitter_x, input_dataset]) #rest task layers rest_x = Dense(self.N_HIDDEN, name='t2_dense_10', **self.DENSE_PARAMS)(x) rest_x = Dense(self.N_CLASS, name='t2_dense_3', **self.DENSE_PARAMS)(rest_x) rest_x = Concatenate(name="rest_output")([rest_x, input_dataset]) #general task layers general_x = Dense(self.N_HIDDEN, name='t3_dense_10', **self.DENSE_PARAMS)(x) general_x = Dense(self.N_CLASS, name='t3_dense_3', **self.DENSE_PARAMS)(general_x) general_x = Concatenate(name="general_output")( [general_x, input_dataset]) model = Model(inputs=[inputs_l, inputs_r, input_dataset], outputs=[twitter_x, rest_x, general_x]) #model.summary() # dictionary = {v.name: i for i, v in enumerate(model.layers)} # print(dictionary) if os.path.exists(self.MODEL_TO_LOAD): print('loading saved model...') model.load_weights(self.MODEL_TO_LOAD) self.model = model self.model.compile(loss={ 'twitter_output': multitask_loss(self.LOSS_WEIGHTS, self.TASK_INDICES[0]), 'rest_output': multitask_loss(self.LOSS_WEIGHTS, self.TASK_INDICES[1]), 'general_output': multitask_loss(self.LOSS_WEIGHTS, self.TASK_INDICES[2]) }, optimizer=optimizers.Adam(lr=self.LEARNING_RATE), metrics=[multitask_accuracy, f1])
def __init__(self): self.HOPS = 5 self.DATASET = 'twitter' # 'restaurant', 'laptop' self.POLARITIES_DIM = 3 self.EMBEDDING_DIM = 200 self.LEARNING_RATE = 0.01 self.LSTM_PARAMS = { 'units': 200, 'activation': 'tanh', 'recurrent_activation': 'sigmoid', 'kernel_initializer': initializers.RandomUniform(minval=-0.003, maxval=0.003), 'recurrent_initializer': initializers.RandomUniform(minval=-0.003, maxval=0.003), 'bias_initializer': initializers.RandomUniform(minval=-0.003, maxval=0.003), 'kernel_regularizer': regularizers.l2(0.001), 'recurrent_regularizer': regularizers.l2(0.001), 'bias_regularizer': regularizers.l2(0.001), 'dropout': 0, 'recurrent_dropout': 0, } self.MAX_SEQUENCE_LENGTH = 40 self.MAX_ASPECT_LENGTH = 2 self.ITERATION = 500 self.BATCH_SIZE = 200 self.texts_raw_indices, self.texts_left_indices, self.aspects_indices, self.texts_right_indices, \ self.polarities_matrix, \ self.embedding_matrix, \ self.tokenizer = \ read_dataset(type=self.DATASET, mode='train', embedding_dim=self.EMBEDDING_DIM, max_seq_len=self.MAX_SEQUENCE_LENGTH, max_aspect_len=self.MAX_ASPECT_LENGTH) if os.path.exists('ram_saved_model.h5'): print('loading saved model...') self.model = load_model('ram_saved_model.h5') else: print('Build model...') inputs_sentence = Input(shape=(self.MAX_SEQUENCE_LENGTH * 2 + self.MAX_ASPECT_LENGTH, ), name='inputs_sentence') inputs_aspect = Input(shape=(self.MAX_ASPECT_LENGTH, ), name='inputs_aspect') sentence = Embedding(input_dim=len(self.tokenizer.word_index) + 1, output_dim=self.EMBEDDING_DIM, input_length=self.MAX_SEQUENCE_LENGTH * 2 + self.MAX_ASPECT_LENGTH, weights=[self.embedding_matrix], trainable=False, name='sentence_embedding')(inputs_sentence) aspect = Embedding(input_dim=len(self.tokenizer.word_index) + 1, output_dim=self.EMBEDDING_DIM, input_length=self.MAX_ASPECT_LENGTH, weights=[self.embedding_matrix], trainable=False, name='aspect_embedding')(inputs_aspect) memory = Bidirectional(LSTM(**self.LSTM_PARAMS, return_sequences=True), name='memory')(sentence) aspect = Bidirectional(LSTM(**self.LSTM_PARAMS, return_sequences=True), name='aspect')(aspect) x = Lambda(lambda xin: K.mean(xin, axis=1), name='aspect_mean')(aspect) SharedAttention = Attention(name='shared_attention') for i in range(self.HOPS): x = SharedAttention((memory, x)) x = Dense(self.POLARITIES_DIM)(x) predictions = Activation('softmax')(x) model = Model(inputs=[inputs_sentence, inputs_aspect], outputs=predictions) model.summary() model.compile(loss='categorical_crossentropy', optimizer=optimizers.Adam(lr=self.LEARNING_RATE), metrics=['acc']) # plot_model(model, to_file='model.png') self.model = model
def __init__(self): self.HOPS = 3 self.SCORE_FUNCTION = 'mlp' # scaled_dot_product / mlp (concat) / bi_linear (general dot) self.DATASET = 'twitter' # 'twitter', 'restaurant', 'laptop' self.POLARITIES_DIM = 3 self.EMBEDDING_DIM = 300 self.LEARNING_RATE = 0.001 self.INITIALIZER = initializers.RandomUniform(minval=-0.05, maxval=0.05) self.REGULARIZER = regularizers.l2(0.001) self.LSTM_PARAMS = { 'units': 200, 'activation': 'tanh', 'recurrent_activation': 'sigmoid', 'kernel_initializer': self.INITIALIZER, 'recurrent_initializer': self.INITIALIZER, 'bias_initializer': self.INITIALIZER, 'kernel_regularizer': self.REGULARIZER, 'recurrent_regularizer': self.REGULARIZER, 'bias_regularizer': self.REGULARIZER, 'dropout': 0, 'recurrent_dropout': 0, } self.MAX_SEQUENCE_LENGTH = 80 self.MAX_ASPECT_LENGTH = 10 self.BATCH_SIZE = 32 self.EPOCHS = 5 self.texts_raw_indices, self.texts_raw_without_aspects_indices, self.texts_left_indices, self.texts_left_with_aspects_indices, \ self.aspects_indices, self.texts_right_indices, self.texts_right_with_aspects_indices, \ self.polarities_matrix, \ self.embedding_matrix, \ self.tokenizer = \ read_dataset(type=self.DATASET, mode='train', embedding_dim=self.EMBEDDING_DIM, max_seq_len=self.MAX_SEQUENCE_LENGTH, max_aspect_len=self.MAX_ASPECT_LENGTH) if os.path.exists('ram_saved_model.h5'): print('loading saved model...') self.model = load_model('ram_saved_model.h5') else: print('Build model...') inputs_sentence = Input(shape=(self.MAX_SEQUENCE_LENGTH,), name='inputs_sentence') inputs_aspect = Input(shape=(self.MAX_ASPECT_LENGTH,), name='inputs_aspect') nonzero_count = Lambda(lambda xin: tf.count_nonzero(xin, dtype=tf.float32))(inputs_aspect) sentence = Embedding(input_dim=len(self.tokenizer.word_index) + 1, output_dim=self.EMBEDDING_DIM, input_length=self.MAX_SEQUENCE_LENGTH, mask_zero=True, weights=[self.embedding_matrix], trainable=False, name='sentence_embedding')(inputs_sentence) aspect = Embedding(input_dim=len(self.tokenizer.word_index) + 1, output_dim=self.EMBEDDING_DIM, input_length=self.MAX_ASPECT_LENGTH, mask_zero=True, weights=[self.embedding_matrix], trainable=False, name='aspect_embedding')(inputs_aspect) memory = Bidirectional(LSTM(return_sequences=True, **self.LSTM_PARAMS), name='memory')(sentence) aspect = Bidirectional(LSTM(return_sequences=True, **self.LSTM_PARAMS), name='aspect')(aspect) x = Lambda(lambda xin: K.sum(xin[0], axis=1) / xin[1], name='aspect_mean')([aspect, nonzero_count]) shared_attention = Attention(score_function=self.SCORE_FUNCTION, initializer=self.INITIALIZER, regularizer=self.REGULARIZER, name='shared_attention') for i in range(self.HOPS): x = shared_attention((memory, x)) x = Flatten()(x) x = Dense(self.POLARITIES_DIM)(x) predictions = Activation('softmax')(x) model = Model(inputs=[inputs_sentence, inputs_aspect], outputs=predictions) model.summary() model.compile(loss='categorical_crossentropy', optimizer=optimizers.Adam(lr=self.LEARNING_RATE), metrics=['acc', f1]) # plot_model(model, to_file='model.png') self.model = model
def __init__(self, embedding_dim=100, batch_size=64, n_hidden=100, learning_rate=0.005, n_class=3, max_sentence_len=40, l2_reg_val=0.003): ############################ self.DATASET = ['laptop'] self.MODEL_TO_LOAD = './models/mtl_absa_saved_model.h5' ########################### self.EMBEDDING_DIM = embedding_dim self.BATCH_SIZE = batch_size self.N_HIDDEN = n_hidden self.LEARNING_RATE = learning_rate self.N_CLASS = n_class self.MAX_SENTENCE_LENGTH = max_sentence_len self.EPOCHS = 4 self.L2_REG_VAL = l2_reg_val self.MAX_ASPECT_LENGTH = 5 self.INITIALIZER = initializers.RandomUniform(minval=-0.002, maxval=0.002) self.REGULARIZER = regularizers.l2(self.L2_REG_VAL) self.LSTM_PARAMS = { 'units': self.N_HIDDEN, 'activation': 'tanh', 'recurrent_activation': 'hard_sigmoid', 'dropout': 0, 'recurrent_dropout': 0 } self.DENSE_PARAMS = { 'kernel_initializer': self.INITIALIZER, 'bias_initializer': self.INITIALIZER, 'kernel_regularizer': self.REGULARIZER, 'bias_regularizer': self.REGULARIZER, 'dtype': 'float32' } self.texts_raw_indices, self.texts_raw_without_aspects_indices, self.texts_left_indices, self.texts_left_with_aspects_indices, \ self.aspects_indices, self.texts_right_indices, self.texts_right_with_aspects_indices, self.dataset_index,\ self.polarities_matrix,self.polarities,\ self.embedding_matrix, \ self.tokenizer = \ read_dataset(types=self.DATASET, mode='train', embedding_dim=self.EMBEDDING_DIM, max_seq_len=self.MAX_SENTENCE_LENGTH, max_aspect_len=self.MAX_ASPECT_LENGTH) print('Build model...') inputs_l = Input(shape=(self.MAX_SENTENCE_LENGTH, ), dtype='int64') inputs_r = Input(shape=(self.MAX_SENTENCE_LENGTH, ), dtype='int64') #input_dataset = Input(shape=(1,),dtype='float32') Embedding_Layer = Embedding(input_dim=len(self.embedding_matrix), output_dim=self.EMBEDDING_DIM, input_length=self.MAX_SENTENCE_LENGTH, mask_zero=True, weights=[self.embedding_matrix], trainable=False) x_l = Embedding_Layer(inputs_l) x_r = Embedding_Layer(inputs_r) x_l = LSTM(name='sentence_left', **self.LSTM_PARAMS)(x_l) x_r = LSTM(go_backwards=True, name='sentence_right', **self.LSTM_PARAMS)(x_r) x = Concatenate(name='last_shared')([x_l, x_r]) #x= Dense(self.N_HIDDEN,name='t1_dense_10',**self.DENSE_PARAMS)(x) output = Dense(self.N_CLASS, name='t1_dense_3', **self.DENSE_PARAMS)(x) model = Model(inputs=[inputs_l, inputs_r], outputs=output) model.summary() weightsToLoad_3 = np.load( './weights/mtl_absa_saved_model_3_2019_09_27_17_00.pkl.npy', allow_pickle=True) weightsToLoad_4 = np.load( './weights/mtl_absa_saved_model_4_2019_09_27_17_00.pkl.npy', allow_pickle=True) # weightsToLoad_6 = np.load('inits.pkl.npy', allow_pickle=True) model.layers[3].set_weights(weightsToLoad_3) model.layers[4].set_weights(weightsToLoad_4) # model.layers[6].set_weights(weightsToLoad_6) if os.path.exists(self.MODEL_TO_LOAD): print('loading saved model...') model.load_weights(self.MODEL_TO_LOAD) self.model = model self.model.compile(loss='categorical_crossentropy', optimizer=optimizers.Adam(lr=self.LEARNING_RATE), metrics=['acc', f1])