def keras_loss(y_true, y_pred): regularization_constant_1 = regularization_constant_2 = 1e-4 epsilon = 1e-12 o1 = o2 = int(y_pred.shape[1] // 2) h_1 = y_pred[:, 0:o1] h_2 = y_pred[:, o1:o1+o2] h_1 = tf.transpose(h_1) h_2 = tf.transpose(h_2) m = tf.shape(h_1)[1] centered_h_1 = h_1 - tf.cast(tf.divide(1, m), tf.float32) * tf.matmul(h_1, tf.ones(shape=(m, m))) centered_h_2 = h_2 - tf.cast(tf.divide(1, m), tf.float32) * tf.matmul(h_2, tf.ones(shape=(m, m))) sigma_hat_12 = tf.cast(tf.divide(1, m - 1), tf.float32) * tf.matmul(centered_h_1, tf.transpose(centered_h_2)) sigma_hat_11 = tf.cast(tf.divide(1, m - 1), tf.float32) * tf.matmul(centered_h_1, tf.transpose(centered_h_1)) + regularization_constant_1 * tf.eye(num_rows=o1) sigma_hat_22 = tf.cast(tf.divide(1, m - 1), tf.float32) * tf.matmul(centered_h_2, tf.transpose(centered_h_2)) + regularization_constant_2 * tf.eye(num_rows=o2) w_1, v_1 = tf.self_adjoint_eig(sigma_hat_11) w_2, v_2 = tf.self_adjoint_eig(sigma_hat_22) idx_pos_entries_1 = tf.where(tf.equal(tf.greater(w_1, epsilon), True)) idx_pos_entries_1 = tf.reshape(idx_pos_entries_1, [-1, tf.shape(idx_pos_entries_1)[0]])[0] w_1 = tf.gather(w_1, idx_pos_entries_1) v_1 = tf.gather(v_1, idx_pos_entries_1) idx_pos_entries_2 = tf.where(tf.equal(tf.greater(w_2, epsilon), True)) idx_pos_entries_2 = tf.reshape(idx_pos_entries_2, [-1, tf.shape(idx_pos_entries_2)[0]])[0] w_2 = tf.gather(w_2, idx_pos_entries_2) v_2 = tf.gather(v_2, idx_pos_entries_2) sigma_hat_rootinvert_11 = tf.matmul(tf.matmul(v_1, tf.diag(tf.divide(1,tf.sqrt(w_1)))), tf.transpose(v_1)) sigma_hat_rootinvert_22 = tf.matmul(tf.matmul(v_2, tf.diag(tf.divide(1,tf.sqrt(w_2)))), tf.transpose(v_2)) t_matrix = tf.matmul(tf.matmul(sigma_hat_rootinvert_11, sigma_hat_12), sigma_hat_rootinvert_22) if k_singular_values == representation_size: # use all correlation = tf.sqrt(tf.trace(tf.matmul(tf.transpose(t_matrix), t_matrix))) else: w, v = tf.self_adjoint_eig(K.dot(K.transpose(t_matrix), t_matrix)) non_critical_indexes = tf.where(tf.equal(tf.greater(w, epsilon), True)) non_critical_indexes = tf.reshape(non_critical_indexes, [-1, tf.shape(non_critical_indexes)[0]])[0] w = tf.gather(w, non_critical_indexes) w = tf.gather(w, tf.nn.top_k(w[:, 2]).indices) correlation = tf.reduce_sum(tf.sqrt(w[0:representation_size])) return -correlation
def build(self,input_shapes): input_shape=input_shapes[0] assert len(input_shape)==3 input_dim=input_shape[2] self.input_batch=input_shape[0] self.input_num=input_shape[1] self.W_c=self.init((input_dim,self.output_dim),name='{}_W_c'.format(self.name)) self.b_c=K.zeros((self.output_dim,),name='{}_b'.format(self.name)) self.W_m=self.init((input_dim,self.mem_vector_dim),name='{}_W_c'.format(self.name)) self.b_m=K.zeros((self.mem_vector_dim,),name='{}_b'.format(self.name)) #可训练参数 self.trainable_weights=[self.W_c,self.W_m,self.b_c,self.b_m]
def build(self, input_shapes): input_shape = input_shapes[0] assert len(input_shape) == 3 input_dim = input_shape[2] self.input_batch = input_shape[0] self.input_num = input_shape[1] self.W_c = self.init((input_dim, self.output_dim), name='{}_W_c'.format(self.name)) self.b_c = K.zeros((self.output_dim, ), name='{}_b'.format(self.name)) self.W_m = self.init((input_dim, self.mem_vector_dim), name='{}_W_c'.format(self.name)) self.b_m = K.zeros((self.mem_vector_dim, ), name='{}_b'.format(self.name)) #可训练参数 self.trainable_weights = [self.W_c, self.W_m, self.b_c, self.b_m]
def get_constants(self, x): ''' get_constants方法有父类LSTM调用,定义了在step函数外的组件,这些组件就不需要序列中的每次输入都重新计算 ''' constants = super(AttentionLSTM, self).get_constants(x) constants.append(K.dot(self.attention_vec, self.U_m) + self.b_m) return constants
def solver_eval(self, y_true, y_pred): if self.output_dim == 1: y_pred = tf.reshape(tf.convert_to_tensor(y_pred, np.float32), [-1]) y_true = tf.reshape(tf.convert_to_tensor(y_true, np.float32), [-1]) return K.mean(keras.losses.mean_squared_error( y_true, y_pred)).eval(session=tf.Session()) elif self.output_dim == 2: y_pred = tf.reshape(tf.convert_to_tensor(y_pred, np.float32), [-1]) y_true = tf.reshape(tf.convert_to_tensor(y_true, np.float32), [-1]) if self.entrophy: return K.mean(K.binary_crossentropy(y_true, y_pred), axis=-1).eval(session=tf.Session()) return 1 - K.mean(keras.metrics.binary_accuracy( y_true, y_pred)).eval(session=tf.Session()) else: y_pred = tf.reshape(tf.convert_to_tensor(y_pred, np.float32), [-1, self.output_dim]) y_true = tf.reshape(tf.convert_to_tensor(y_true, np.float32), [-1, self.output_dim]) if self.entrophy: return K.mean(K.categorical_crossentropy(y_true, y_pred), axis=-1).eval(session=tf.Session()) return 1 - K.mean( keras.metrics.categorical_accuracy( y_true, y_pred)).eval(session=tf.Session())
def get_constants(self,inputs): ''' get_constants方法有父类LSTM调用,定义了在step函数外的组件,这些组件就不需要序列中的每次输入都重新计算 ''' x=inputs[0] attention_vec=inputs[1] constants=super(AttentionLSTM,self).get_constants(x) constants.append(K.dot(attention_vec,self.U_m)+self.b_m) return constants
def cosine_error(x): #x=[x1,x2,x3,x4] ,xi.shape=(batch_size,input_dim) cos1=cosine(x[0],x[1]) #cos shape=(batch_size,) cos2=cosine(x[0],x[2]) cos3=cosine(x[0],x[3]) cos4=cosine(x[0],x[4]) cos5=cosine(x[0],x[5]) cos6=cosine(x[0],x[6]) delta=5 p=K.exp(cos1*delta)/(K.exp(cos1*delta)+K.exp(cos2*delta)+K.exp(cos3*delta)+K.exp(cos4*delta)+K.exp(cos5*delta)+K.exp(cos6*delta)) #softmax f=-K.log(p) #objective function:-log #f.shape=(batch_size,) return K.reshape(f,(K.shape(p)[0],1)) #return.sahpe=(batch_size,1)
def build(self, input_shapes): ''' build方法初始化权重矩阵 U_a: x到attention输出的权值矩阵 U_m: attention_vec到attention输出的取值矩阵 U_s: attention输出到softmax输出的权重矩阵 ''' input_shape = input_shapes[0] super(AttentionLSTM, self).build(input_shape) self.input_spec = [ InputSpec(shape=input_shapes[0]), InputSpec(shape=input_shapes[1]) ] #attention_dim=self.input_spec[1].shape[1] attention_dim = self.att_dim input_dim = input_shape[2] #attention参数 self.U_a = self.inner_init((input_dim, self.output_dim), name='{}_U_a'.format(self.name)) self.b_a = K.zeros((self.output_dim, ), name='{}_b_a'.format(self.name)) self.U_m = self.inner_init((attention_dim, self.output_dim), name='{}_U_m'.format(self.name)) self.b_m = K.zeros((self.output_dim, ), name='{}_b_m'.format(self.name)) if self.single_attention_param: self.U_s = self.inner_init((self.output_dim, 1), name='{}_U_s'.format(self.name)) self.b_s = K.zeros((1, ), name='{}_b_s'.format(self.name)) else: self.U_s = self.inner_init((self.output_dim, self.output_dim), name='{}_U_s'.format(self.name)) self.b_s = K.zeros((self.output_dim, ), name='{}_b_s'.format(self.name)) self.trainable_weights += [ self.U_a, self.U_m, self.U_s, self.b_a, self.b_m, self.b_s ] if self.initial_weights is not None: self.set_weights(self.initial_weights) del self.initial_weights
def step(self, x, states): ''' step方法由父类RNN调用,定义每次输入在网络中的传播的运算 states[4]存放attention_vec到attention层的输出状态 ''' h, [h, c] = super(AttentionLSTM, self).step(x, states) attention = states[4] m = self.attn_inner_activation( K.dot(h, self.U_a) * attention + self.b_a) # Intuitively it makes more sense to use a sigmoid (was getting some NaN problems # which I think might have been caused by the exponential function -> gradients blow up) s = self.attn_activation(K.dot(m, self.U_s) + self.b_s) if self.single_attention_param: h = h * K.repeat_elements(s, self.output_dim, axis=1) else: h = h * s return h, [h, c]
def build(self, input_shape): ''' build方法初始化权重矩阵 U_a: LSTM层输出到attention输出的权值矩阵 U_m: attention_vec到attention输出的取值矩阵 U_s: attention输出到softmax输出的权重矩阵 ''' super(AttentionLSTM, self).build(input_shape) if hasattr(self.attention_vec, '_keras_shape'): attention_dim = self.attention_vec._keras_shape[1] else: raise Exception( 'Layer could not be build: No information about expected input shape.' ) attention_dim = self.attention_vec._keras_shape[1] #attention参数 self.U_a = self.inner_init((self.output_dim, self.output_dim), name='{}_U_a'.format(self.name)) self.b_a = K.zeros((self.output_dim, ), name='{}_b_a'.format(self.name)) self.U_m = self.inner_init((attention_dim, self.output_dim), name='{}_U_m'.format(self.name)) self.b_m = K.zeros((self.output_dim, ), name='{}_b_m'.format(self.name)) if self.single_attention_param: self.U_s = self.inner_init((self.output_dim, 1), name='{}_U_s'.format(self.name)) self.b_s = K.zeros((1, ), name='{}_b_s'.format(self.name)) else: self.U_s = self.inner_init((self.output_dim, self.output_dim), name='{}_U_s'.format(self.name)) self.b_s = K.zeros((self.output_dim, ), name='{}_b_s'.format(self.name)) self.trainable_weights += [ self.U_a, self.U_m, self.U_s, self.b_a, self.b_m, self.b_s ] if self.initial_weights is not None: self.set_weights(self.initial_weights) del self.initial_weights
def attention_3d_block(inputs,input_dim,is_single_attention_vector=False): # inputs.shape = (batch_size, time_steps, input_dim) feature_length = int(inputs.shape[2]) a = Permute((2, 1))(inputs) # a = Reshape((input_dim, time_steps))(a) # this line is not useful. It's just to know which dimension is what. a = Dense(input_dim, activation='softmax')(a) if is_single_attention_vector: a = Lambda(lambda x: K.mean(x, axis=1), name='dim_reduction')(a) a = RepeatVector(feature_length)(a) a_probs = Permute((2, 1), name='attention_vec')(a) output_attention_mul = merge([inputs, a_probs], name='attention_mul', mode='mul') return output_attention_mul
def attention_3d_block(inputs, time_steps, single_attention_vector=True): # inputs.shape = (batch_size, time_steps, input_dim) input_dim = int(inputs.shape[2]) a = Permute((2, 1))(inputs) a = Reshape( (input_dim, time_steps) )(a) # this line is not useful. It's just to know which dimension is what. a = Dense(time_steps, activation='softmax')(a) if single_attention_vector: a = Lambda(lambda x: K.mean(x, axis=1), name='dim_reduction')(a) a = RepeatVector(input_dim)(a) a_probs = Permute((2, 1), name='attention_vec')(a) output_attention_mul = Multiply()([inputs, a_probs]) return output_attention_mul
def build(self,input_shapes): ''' build方法初始化权重矩阵 U_a: x到attention输出的权值矩阵 U_m: attention_vec到attention输出的取值矩阵 U_s: attention输出到softmax输出的权重矩阵 ''' input_shape=input_shapes[0] super(AttentionLSTM,self).build(input_shape) self.input_spec = [InputSpec(shape=input_shapes[0]),InputSpec(shape=input_shapes[1])] #attention_dim=self.input_spec[1].shape[1] attention_dim=self.att_dim input_dim = input_shape[2] #attention参数 self.U_a=self.inner_init((input_dim,self.output_dim), name='{}_U_a'.format(self.name)) self.b_a=K.zeros((self.output_dim,),name='{}_b_a'.format(self.name)) self.U_m=self.inner_init((attention_dim,self.output_dim), name='{}_U_m'.format(self.name)) self.b_m=K.zeros((self.output_dim,),name='{}_b_m'.format(self.name)) if self.single_attention_param: self.U_s = self.inner_init((self.output_dim, 1), name='{}_U_s'.format(self.name)) self.b_s = K.zeros((1,), name='{}_b_s'.format(self.name)) else: self.U_s = self.inner_init((self.output_dim, self.output_dim), name='{}_U_s'.format(self.name)) self.b_s = K.zeros((self.output_dim,), name='{}_b_s'.format(self.name)) self.trainable_weights+=[self.U_a,self.U_m,self.U_s, self.b_a,self.b_m,self.b_s] if self.initial_weights is not None: self.set_weights(self.initial_weights) del self.initial_weights
def LSTNet(trainX1, trainX2, trainY, config): input1 = Input(shape=(trainX1.shape[1], trainX1.shape[2])) conv1 = Conv1D(filters=48, kernel_size=6, strides=1, activation='relu') # for input1 # It's a probelm that I can't find any way to use the same Conv1D layer to train the two inputs, conv2 = Conv1D(filters=48, kernel_size=6, strides=1, activation='relu') # for input2 conv2.set_weights(conv1.get_weights()) # at least use same weight conv1out = conv1(input1) lstm1out = CuDNNLSTM(64)(conv1out) lstm1out = Dropout(config.dropout)(lstm1out) input2 = Input(shape=(trainX2.shape[1], trainX2.shape[2])) conv2out = conv2(input2) lstm2out = CuDNNLSTM(64)(conv2out) lstm2out = Dropout(config.dropout)(lstm2out) lstm_out = concatenate([lstm1out, lstm2out]) output = Dense(trainY.shape[1])(lstm_out) #highway 使用Dense模拟AR自回归过程,为预测添加线性成份,同时使输出可以响应输入的尺度变化。 highway_window = config.highway_window #截取近3个窗口的时间维 保留了所有的输入维度 z = Lambda(lambda k: k[:, -highway_window:, :])(input1) z = Lambda(lambda k: K.permute_dimensions(k, (0, 2, 1)))(z) z = Lambda(lambda k: K.reshape(k, (-1, highway_window * trainX1.shape[2])))(z) z = Dense(trainY.shape[1])(z) output = add([output, z]) output = Activation('sigmoid')(output) model = Model(inputs=[input1, input2], outputs=output) return model
def call(self, x, mask=None): mask = mask[0] # input shape: (nb_samples, time (padded with zeros), input_dim) # note that the .build() method of subclasses MUST define # self.input_spec with a complete input shape. input_shape = self.input_spec[0].shape if K._BACKEND == 'tensorflow': if not input_shape[1]: raise Exception('When using TensorFlow, you should define ' 'explicitly the number of timesteps of ' 'your sequences.\n' 'If your first layer is an Embedding, ' 'make sure to pass it an "input_length" ' 'argument. Otherwise, make sure ' 'the first layer has ' 'an "input_shape" or "batch_input_shape" ' 'argument, including the time axis. ' 'Found input shape at layer ' + self.name + ': ' + str(input_shape)) if self.stateful: initial_states = self.states else: initial_states = self.get_initial_states(x) constants = self.get_constants(x) preprocessed_input = self.preprocess_input(x) last_output, outputs, states = K.rnn(self.step, preprocessed_input, initial_states, go_backwards=self.go_backwards, constants=constants, unroll=self.unroll, input_length=input_shape[1]) if self.stateful: self.updates = [] for i in range(len(states)): self.updates.append((self.states[i], states[i])) if self.return_sequences: return outputs else: return last_output
def call(self, x, mask=None): mask=mask[0] # input shape: (nb_samples, time (padded with zeros), input_dim) # note that the .build() method of subclasses MUST define # self.input_spec with a complete input shape. input_shape = self.input_spec[0].shape if K._BACKEND == 'tensorflow': if not input_shape[1]: raise Exception('When using TensorFlow, you should define ' 'explicitly the number of timesteps of ' 'your sequences.\n' 'If your first layer is an Embedding, ' 'make sure to pass it an "input_length" ' 'argument. Otherwise, make sure ' 'the first layer has ' 'an "input_shape" or "batch_input_shape" ' 'argument, including the time axis. ' 'Found input shape at layer ' + self.name + ': ' + str(input_shape)) if self.stateful: initial_states = self.states else: initial_states = self.get_initial_states(x) constants = self.get_constants(x) preprocessed_input = self.preprocess_input(x) last_output, outputs, states = K.rnn(self.step, preprocessed_input, initial_states, go_backwards=self.go_backwards, constants=constants, unroll=self.unroll, input_length=input_shape[1]) if self.stateful: self.updates = [] for i in range(len(states)): self.updates.append((self.states[i], states[i])) if self.return_sequences: return outputs else: return last_output
def call(self, inputs, mask=None): #w_c=K.repeat(self.W_c,self.input_num) #w_m=K.repeat(self.W_m,self.input_num) x = inputs[0] mem_vector = inputs[1] c = K.dot(x, self.W_c) + self.b_c #context向量 m = K.dot(x, self.W_m) + self.b_m #memory向量 mem_vec = K.repeat(mem_vector, self.input_num) #与问题进行内积 m = K.sum(m * mem_vec, axis=2, keepdims=False) s = K.softmax(m) #softmax s = K.reshape(s, (-1, self.input_num, 1)) ctx = self.activation(c * s) return ctx #self.activation(ctx)
def call(self,inputs,mask=None): #w_c=K.repeat(self.W_c,self.input_num) #w_m=K.repeat(self.W_m,self.input_num) x=inputs[0] mem_vector=inputs[1] c=K.dot(x,self.W_c)+self.b_c #context向量 m=K.dot(x,self.W_m)+self.b_m #memory向量 mem_vec=K.repeat(mem_vector,self.input_num) #与问题进行内积 m=K.sum(m*mem_vec,axis=2,keepdims=False) s=K.softmax(m) #softmax s=K.reshape(s,(-1,self.input_num,1)) ctx=self.activation(c*s) return ctx#self.activation(ctx)
def step(self,x,states): ''' step方法由父类RNN调用,定义每次输入在网络中的传播的运算 states[4]存放attention_vec到attention层的输出状态 ''' h_tm1 = states[0] c_tm1 = states[1] B_U = states[2] B_W = states[3] if self.consume_less == 'cpu': x_i = x[:, :self.output_dim] x_f = x[:, self.output_dim: 2 * self.output_dim] x_c = x[:, 2 * self.output_dim: 3 * self.output_dim] x_o = x[:, 3 * self.output_dim:] else: x_i = K.dot(x * B_W[0], self.W_i) + self.b_i x_f = K.dot(x * B_W[1], self.W_f) + self.b_f x_c = K.dot(x * B_W[2], self.W_c) + self.b_c x_o = K.dot(x * B_W[3], self.W_o) + self.b_o i = self.inner_activation(x_i + K.dot(h_tm1 * B_U[0], self.U_i)) f = self.inner_activation(x_f + K.dot(h_tm1 * B_U[1], self.U_f)) c = f * c_tm1 + i * self.activation(x_c + K.dot(h_tm1 * B_U[2], self.U_c)) o = self.inner_activation(x_o + K.dot(h_tm1 * B_U[3], self.U_o)) h = o * self.activation(c) attention=states[4] m = self.attn_inner_activation(K.dot(K.dot(x_i,self.W_i.T), self.U_a) +attention + self.b_a) # Intuitively it makes more sense to use a sigmoid (was getting some NaN problems # which I think might have been caused by the exponential function -> gradients blow up) s = self.attn_activation(K.dot(m, self.U_s) + self.b_s) if self.single_attention_param: h = h * K.repeat_elements(s, self.output_dim, axis=1) else: h = h * s return h, [h, c]
def eval_split(self, name, want, model_l, parts_train, parts_valid): y_pred_l = [] y_valid_l = [] if not self.solver: for i in range(max(self.output_dim, len(self.k_mean_list))): model = model_l[i][1] X_valid_tmp, y_valid_tmp = parts_valid[i] y_pred_l.append(model.predict(X_valid_tmp)) y_valid_l.append(y_valid_tmp) y_pred = np.concatenate(y_pred_l) y_valid = np.concatenate(y_valid_l) if self.output_dim == 1: y_pred = tf.reshape(tf.convert_to_tensor(y_pred, np.float32), [-1]) y_valid = tf.reshape(tf.convert_to_tensor(y_valid, np.float32), [-1]) return K.mean(keras.losses.mean_squared_error( y_valid, y_pred)).eval(session=tf.Session()) elif self.output_dim == 2: y_pred = tf.reshape(tf.convert_to_tensor(y_pred, np.float32), [-1]) y_valid = tf.reshape(tf.convert_to_tensor(y_valid, np.float32), [-1]) return K.mean(K.binary_crossentropy(y_valid, y_pred), axis=-1).eval(session=tf.Session()) else: y_pred = tf.reshape(tf.convert_to_tensor(y_pred, np.float32), [-1, self.output_dim]) y_valid = tf.reshape(tf.convert_to_tensor(y_valid, np.float32), [-1, self.output_dim]) return K.mean(K.categorical_crossentropy(y_valid, y_pred), axis=-1).eval(session=tf.Session()) else: for i in range(max(self.output_dim, len(self.k_mean_list))): model = model_l[i][1] X_train, y_train = parts_train[i] X_valid, y_valid = parts_valid[i] dense_layer_model = Model( inputs=model.input, outputs=model.get_layer(index=-2).output) hidden = dense_layer_model.predict(X_train) hidden2 = dense_layer_model.predict(X_valid) mix = np.concatenate((X_train, hidden), 1) mix2 = np.concatenate((X_valid, hidden2), 1) my_solver_train = Solver(X_train, X_valid, y_train, y_valid, train=self.solver) loss_train = self.solver_eval(my_solver_train.predict()[0], my_solver_train.predict()[1]) my_solver_hidden = Solver(hidden, hidden2, y_train, y_valid, train=self.solver) loss_hidden = self.solver_eval(my_solver_hidden.predict()[0], my_solver_hidden.predict()[1]) my_solver_mix = Solver(mix, mix2, y_train, y_valid, train=self.solver) loss_mix = self.solver_eval(my_solver_mix.predict()[0], my_solver_mix.predict()[1]) loss_min = min(loss_train, loss_hidden, loss_mix) if loss_min == loss_train: self.solver_dict[name + '_' + str(i) + want + '_' + str(len(model.layers))] = ( my_solver_train, 'train') y_valid_tmp, y_pred_tmp = my_solver_train.predict() elif loss_min == loss_hidden: self.solver_dict[name + '_' + str(i) + want + '_' + str(len(model.layers))] = ( my_solver_hidden, 'hidden') y_valid_tmp, y_pred_tmp = my_solver_hidden.predict() else: self.solver_dict[name + '_' + str(i) + want + '_' + str(len(model.layers))] = (my_solver_mix, 'mix') y_valid_tmp, y_pred_tmp = my_solver_mix.predict() y_pred_l.append(y_pred_tmp) y_valid_l.append(y_valid_tmp) y_pred = np.concatenate(y_pred_l) y_valid = np.concatenate(y_valid_l) return self.solver_eval(y_valid, y_pred)
def cosine(x1,x2): return K.sum(x1*x2,axis=-1)/(K.sqrt(K.sum(x1*x1,axis=-1)*K.sum(x2*x2,axis=-1))+0.0000001) #cos
def step(self, x, states): ''' step方法由父类RNN调用,定义每次输入在网络中的传播的运算 states[4]存放attention_vec到attention层的输出状态 ''' h_tm1 = states[0] c_tm1 = states[1] B_U = states[2] B_W = states[3] if self.consume_less == 'cpu': x_i = x[:, :self.output_dim] x_f = x[:, self.output_dim:2 * self.output_dim] x_c = x[:, 2 * self.output_dim:3 * self.output_dim] x_o = x[:, 3 * self.output_dim:] else: x_i = K.dot(x * B_W[0], self.W_i) + self.b_i x_f = K.dot(x * B_W[1], self.W_f) + self.b_f x_c = K.dot(x * B_W[2], self.W_c) + self.b_c x_o = K.dot(x * B_W[3], self.W_o) + self.b_o i = self.inner_activation(x_i + K.dot(h_tm1 * B_U[0], self.U_i)) f = self.inner_activation(x_f + K.dot(h_tm1 * B_U[1], self.U_f)) c = f * c_tm1 + i * self.activation(x_c + K.dot(h_tm1 * B_U[2], self.U_c)) o = self.inner_activation(x_o + K.dot(h_tm1 * B_U[3], self.U_o)) h = o * self.activation(c) attention = states[4] m = self.attn_inner_activation( K.dot(K.dot(x_i, self.W_i.T), self.U_a) + attention + self.b_a) # Intuitively it makes more sense to use a sigmoid (was getting some NaN problems # which I think might have been caused by the exponential function -> gradients blow up) s = self.attn_activation(K.dot(m, self.U_s) + self.b_s) if self.single_attention_param: h = h * K.repeat_elements(s, self.output_dim, axis=1) else: h = h * s return h, [h, c]
def eval_model(self, name, model, X_train, X_valid, y_train, y_valid): if not self.solver: y_pred = model.predict(X_valid) if self.output_dim == 1: y_pred = tf.reshape(tf.convert_to_tensor(y_pred, np.float32), [-1]) y_valid = tf.reshape(tf.convert_to_tensor(y_valid, np.float32), [-1]) return K.mean(keras.losses.mean_squared_error( y_valid, y_pred)).eval(session=tf.Session()) elif self.output_dim == 2: y_pred = tf.reshape(tf.convert_to_tensor(y_pred, np.float32), [-1]) y_valid = tf.reshape(tf.convert_to_tensor(y_valid, np.float32), [-1]) return K.mean(K.binary_crossentropy(y_valid, y_pred), axis=-1).eval(session=tf.Session()) else: y_pred = tf.reshape(tf.convert_to_tensor(y_pred, np.float32), [-1, self.output_dim]) y_valid = tf.reshape(tf.convert_to_tensor(y_valid, np.float32), [-1, self.output_dim]) return K.mean(K.categorical_crossentropy(y_valid, y_pred), axis=-1).eval(session=tf.Session()) else: dense_layer_model = Model(inputs=model.input, outputs=model.get_layer(index=-2).output) hidden = dense_layer_model.predict(X_train) hidden2 = dense_layer_model.predict(X_valid) mix = np.concatenate((X_train, hidden), 1) mix2 = np.concatenate((X_valid, hidden2), 1) my_solver_train = Solver(X_train, X_valid, y_train, y_valid, train=self.solver) loss_train = self.solver_eval(my_solver_train.predict()[0], my_solver_train.predict()[1]) my_solver_hidden = Solver(hidden, hidden2, y_train, y_valid, train=self.solver) loss_hidden = self.solver_eval(my_solver_hidden.predict()[0], my_solver_hidden.predict()[1]) my_solver_mix = Solver(mix, mix2, y_train, y_valid, train=self.solver) loss_mix = self.solver_eval(my_solver_mix.predict()[0], my_solver_mix.predict()[1]) loss_min = min(loss_train, loss_hidden, loss_mix) if loss_min == loss_train: self.solver_dict[name + '_' + str(len(model.layers))] = (my_solver_train, 'train') elif loss_min == loss_hidden: self.solver_dict[name + '_' + str(len(model.layers))] = (my_solver_hidden, 'hidden') else: self.solver_dict[name + '_' + str(len(model.layers))] = (my_solver_mix, 'mix') return loss_min
from keras.models import Sequential, K from keras.layers import Dense, Flatten, Conv2D, MaxPooling2D, Dropout from keras.layers.normalization import BatchNormalization from sklearn.model_selection import train_test_split import numpy as np from numpy import array, genfromtxt import pickle import os import pandas as pd K.clear_session() DIR = '/home/rigas/Downloads/genres/spectograms' X_train = [] y_train = [] data = [] for file in os.listdir(DIR): file_path = os.path.join(DIR, file) for csv in os.listdir(file_path): csv_path = os.path.join(file_path, csv) #data = pd.read_csv(csv_path, header=None) data = genfromtxt(csv_path, delimiter=',') X_train.append(data) y_train.append(file) #X_train = np.array(X_train) #X_train = X_train.astype('float32') print(np.shape(X_train)) print(np.shape(data)) # Building the model model = Sequential()