def model_regression_dropout_var(filtros=32, dropout_rate=0.00, l2_rate=1e-8, activation='relu', n_filters=64, l2_reg=1e-12, input_channel_num=1, out_ch=1, start_ch=32, inc_rate=2., depth=2, batchnorm=False, maxpool=True, upconv=True, residual=False,disableDo=False): # UNet: code from https://github.com/pietz/unet-keras def _conv_block(m, dim, acti, bn, res, do=0): n = Conv2D(dim, 3, padding='same',kernel_initializer='he_normal', activation=acti)(m) n = MCDropout(do)(n) n = Conv2D(dim, 3, padding='same',kernel_initializer='he_normal', activation=acti)(n) n = MCDropout(do)(n) return Concatenate()([m, n]) if res else n def _level_block(m, dim, depth, inc, acti, do, bn, mp, up, res): if depth > 0: n = _conv_block(m, dim, acti, bn, res,do) m = MaxPooling2D()(n) if mp else Conv2D(dim, 3, strides=2, padding='same')(n) m = _level_block(m, int(inc * dim), depth - 1, inc, acti, do, bn, mp, up, res) if up: m = UpSampling2D()(m) m = Conv2D(dim, 2, activation=acti, padding='same')(m) else: m = Conv2DTranspose(dim, 3, strides=2, activation=acti, padding='same')(m) n = Concatenate()([n, m]) m = _conv_block(n, dim, acti, bn, res,do) else: m = _conv_block(m, dim, acti, bn, res, do) return m def _level_block2(m, dim, depth, inc, acti, do, bn, mp, up, res): m = _conv_block(m, dim, acti, bn, res, do) m = _conv_block(m, dim, acti, bn, res, do) m = _conv_block(m, dim, acti, bn, res, do) m = _conv_block(m, dim, acti, bn, res, do) return m input_tensor = Input(shape=(None, None, 1),name='input') labels_err_tensor = Input(shape=(None,None, 1), name='label_err') # Good old output o1 = _level_block(input_tensor, 32, depth, inc_rate, activation, dropout_rate, batchnorm, maxpool, upconv, residual) linear_output = Conv2D(1,1,activation="linear",name='linear_output')(o1) # Data-dependent uncertainty outainty o2 = _level_block2(input_tensor, 32, 0, inc_rate, activation, dropout_rate, batchnorm, maxpool, upconv, residual) o2 = Conv2D(1,1, activation="linear")(o2) # We had to force to start close to the solution because working in log has this problem variance_output = Lambda(lambda x: -K.abs(x)-12,name='variance_output')(o2) model = Model(inputs=[input_tensor, labels_err_tensor], outputs=[variance_output, linear_output]) model_prediction = Model(inputs=input_tensor, outputs=[variance_output, linear_output]) mse_var_ext = mse_var_wrapper(linear_output, labels_err_tensor) mse_lin_ext = mse_lin_wrapper(variance_output, labels_err_tensor) return model, model_prediction, mse_lin_ext, mse_var_ext
def custom_train_step(self, data): """ Custom training logic :param data: :return: """ data = data_adapter.expand_1d(data) x, y, sample_weight = data_adapter.unpack_x_y_sample_weight(data) with tf.GradientTape() as tape: y_pred = self.keras_model(x, training=True) loss = self.keras_model.compiled_loss( y, y_pred, sample_weight, regularization_losses=self.keras_model.losses) if self.task == 'regression': variance_loss = mse_var_wrapper(y_pred[0], x['labels_err']) output_loss = mse_lin_wrapper(y_pred[1], x['labels_err']) elif self.task == 'classification': output_loss = bayesian_categorical_crossentropy_wrapper( y_pred[1]) variance_loss = bayesian_categorical_crossentropy_var_wrapper( y_pred[0]) elif self.task == 'binary_classification': output_loss = bayesian_binary_crossentropy_wrapper(y_pred[1]) variance_loss = bayesian_binary_crossentropy_var_wrapper( y_pred[0]) else: raise RuntimeError( 'Only "regression", "classification" and "binary_classification" are supported' ) loss = output_loss(y['output'], y_pred[0]) + variance_loss( y['variance_output'], y_pred[1]) # apply gradient here if version.parse(tf.__version__) >= version.parse("2.4.0"): self.keras_model.optimizer.minimize( loss, self.keras_model.trainable_variables, tape=tape) else: tf.python.keras.engine.training._minimize( self.keras_model.distribute_strategy, tape, self.keras_model.optimizer, loss, self.keras_model.trainable_variables) self.keras_model.compiled_metrics.update_state(y, y_pred, sample_weight) return {m.name: m.result() for m in self.keras_model.metrics}
def model(self): input_tensor = Input(shape=self._input_shape, name='input') labels_err_tensor = Input(shape=(self._labels_shape,), name='labels_err') cnn_layer_1 = Conv1D(kernel_initializer=self.initializer, padding="same", filters=self.num_filters[0], kernel_size=self.filter_len, kernel_regularizer=regularizers.l2(self.l2))(input_tensor) activation_1 = Activation(activation=self.activation)(cnn_layer_1) dropout_1 = MCDropout(self.dropout_rate, disable=self.disable_dropout)(activation_1) cnn_layer_2 = Conv1D(kernel_initializer=self.initializer, padding="same", filters=self.num_filters[1], kernel_size=self.filter_len, kernel_regularizer=regularizers.l2(self.l2))(dropout_1) activation_2 = Activation(activation=self.activation)(cnn_layer_2) maxpool_1 = MaxPooling1D(pool_size=self.pool_length)(activation_2) flattener = Flatten()(maxpool_1) dropout_2 = MCDropout(self.dropout_rate, disable=self.disable_dropout)(flattener) layer_3 = Dense(units=self.num_hidden[0], kernel_regularizer=regularizers.l2(self.l2), kernel_initializer=self.initializer, activation=self.activation)(dropout_2) activation_3 = Activation(activation=self.activation)(layer_3) dropout_3 = MCDropout(self.dropout_rate, disable=self.disable_dropout)(activation_3) layer_4 = Dense(units=self.num_hidden[1], kernel_regularizer=regularizers.l2(self.l2), kernel_initializer=self.initializer, activation=self.activation)(dropout_3) activation_4 = Activation(activation=self.activation)(layer_4) output = Dense(units=self._labels_shape, name='output')(activation_4) output_activated = Activation(activation=self._last_layer_activation)(output) variance_output = Dense(units=self._labels_shape, activation='linear', name='variance_output')(activation_4) model = Model(inputs=[input_tensor, labels_err_tensor], outputs=[output, variance_output]) # new astroNN high performance dropout variational inference on GPU expects single output model_prediction = Model(inputs=[input_tensor], outputs=concatenate([output, variance_output])) if self.task == 'regression': variance_loss = mse_var_wrapper(output, labels_err_tensor) output_loss = mse_lin_wrapper(variance_output, labels_err_tensor) elif self.task == 'classification': output_loss = bayesian_categorical_crossentropy_wrapper(variance_output) variance_loss = bayesian_categorical_crossentropy_var_wrapper(output) elif self.task == 'binary_classification': output_loss = bayesian_binary_crossentropy_wrapper(variance_output) variance_loss = bayesian_binary_crossentropy_var_wrapper(output) else: raise RuntimeError('Only "regression", "classification" and "binary_classification" are supported') return model, model_prediction, output_loss, variance_loss
def model(self): input_tensor = Input(shape=self._input_shape, name='input') input_tensor_flattened = Flatten()(input_tensor) labels_err_tensor = Input(shape=(self._labels_shape, ), name='labels_err') # slice spectra to censor out useless region for elements censored_c_input = BoolMask(aspcap_mask("C", dr=14), name='C_Mask')(input_tensor_flattened) censored_c1_input = BoolMask(aspcap_mask("C1", dr=14), name='C1_Mask')(input_tensor_flattened) censored_n_input = BoolMask(aspcap_mask("N", dr=14), name='N_Mask')(input_tensor_flattened) censored_o_input = BoolMask(aspcap_mask("O", dr=14), name='O_Mask')(input_tensor_flattened) censored_na_input = BoolMask(aspcap_mask("Na", dr=14), name='Na_Mask')(input_tensor_flattened) censored_mg_input = BoolMask(aspcap_mask("Mg", dr=14), name='Mg_Mask')(input_tensor_flattened) censored_al_input = BoolMask(aspcap_mask("Al", dr=14), name='Al_Mask')(input_tensor_flattened) censored_si_input = BoolMask(aspcap_mask("Si", dr=14), name='Si_Mask')(input_tensor_flattened) censored_p_input = BoolMask(aspcap_mask("P", dr=14), name='P_Mask')(input_tensor_flattened) censored_s_input = BoolMask(aspcap_mask("S", dr=14), name='S_Mask')(input_tensor_flattened) censored_k_input = BoolMask(aspcap_mask("K", dr=14), name='K_Mask')(input_tensor_flattened) censored_ca_input = BoolMask(aspcap_mask("Ca", dr=14), name='Ca_Mask')(input_tensor_flattened) censored_ti_input = BoolMask(aspcap_mask("Ti", dr=14), name='Ti_Mask')(input_tensor_flattened) censored_ti2_input = BoolMask(aspcap_mask("Ti2", dr=14), name='Ti2_Mask')(input_tensor_flattened) censored_v_input = BoolMask(aspcap_mask("V", dr=14), name='V_Mask')(input_tensor_flattened) censored_cr_input = BoolMask(aspcap_mask("Cr", dr=14), name='Cr_Mask')(input_tensor_flattened) censored_mn_input = BoolMask(aspcap_mask("Mn", dr=14), name='Mn_Mask')(input_tensor_flattened) censored_co_input = BoolMask(aspcap_mask("Co", dr=14), name='Co_Mask')(input_tensor_flattened) censored_ni_input = BoolMask(aspcap_mask("Ni", dr=14), name='Ni_Mask')(input_tensor_flattened) # get neurones from each elements from censored spectra c_dense = MCDropout(self.dropout_rate, disable=self.disable_dropout)( Dense(units=self.num_hidden[2] * 8, kernel_initializer=self.initializer, name='c_dense', activation=self.activation, kernel_regularizer=regularizers.l2( self.l2))(censored_c_input)) c1_dense = MCDropout(self.dropout_rate, disable=self.disable_dropout)( Dense(units=self.num_hidden[2], kernel_initializer=self.initializer, name='c1_dense', activation=self.activation, kernel_regularizer=regularizers.l2( self.l2))(censored_c1_input)) n_dense = MCDropout(self.dropout_rate, disable=self.disable_dropout)( Dense(units=self.num_hidden[2] * 8, kernel_initializer=self.initializer, name='n_dense', activation=self.activation, kernel_regularizer=regularizers.l2( self.l2))(censored_n_input)) o_dense = MCDropout(self.dropout_rate, disable=self.disable_dropout)( Dense(units=self.num_hidden[2], kernel_initializer=self.initializer, name='o_dense', activation=self.activation, kernel_regularizer=regularizers.l2( self.l2))(censored_o_input)) na_dense = MCDropout(self.dropout_rate, disable=self.disable_dropout)( Dense(units=self.num_hidden[2], kernel_initializer=self.initializer, name='na_dense', activation=self.activation, kernel_regularizer=regularizers.l2( self.l2))(censored_na_input)) mg_dense = MCDropout(self.dropout_rate, disable=self.disable_dropout)( Dense(units=self.num_hidden[2], kernel_initializer=self.initializer, name='mg_dense', activation=self.activation, kernel_regularizer=regularizers.l2( self.l2))(censored_mg_input)) al_dense = MCDropout(self.dropout_rate, disable=self.disable_dropout)( Dense(units=self.num_hidden[2], kernel_initializer=self.initializer, name='al_dense', activation=self.activation, kernel_regularizer=regularizers.l2( self.l2))(censored_al_input)) si_dense = MCDropout(self.dropout_rate, disable=self.disable_dropout)( Dense(units=self.num_hidden[2], kernel_initializer=self.initializer, name='si_dense', activation=self.activation, kernel_regularizer=regularizers.l2( self.l2))(censored_si_input)) p_dense = MCDropout(self.dropout_rate, disable=self.disable_dropout)( Dense(units=self.num_hidden[2], kernel_initializer=self.initializer, name='p_dense', activation=self.activation, kernel_regularizer=regularizers.l2( self.l2))(censored_p_input)) s_dense = MCDropout(self.dropout_rate, disable=self.disable_dropout)( Dense(units=self.num_hidden[2], kernel_initializer=self.initializer, name='s_dense', activation=self.activation, kernel_regularizer=regularizers.l2( self.l2))(censored_s_input)) k_dense = MCDropout(self.dropout_rate, disable=self.disable_dropout)( Dense(units=self.num_hidden[2], kernel_initializer=self.initializer, name='k_dense', activation=self.activation, kernel_regularizer=regularizers.l2( self.l2))(censored_k_input)) ca_dense = MCDropout(self.dropout_rate, disable=self.disable_dropout)( Dense(units=self.num_hidden[2], kernel_initializer=self.initializer, name='ca_dense', activation=self.activation, kernel_regularizer=regularizers.l2( self.l2))(censored_ca_input)) ti_dense = MCDropout(self.dropout_rate, disable=self.disable_dropout)( Dense(units=self.num_hidden[2], kernel_initializer=self.initializer, name='ti_dense', activation=self.activation, kernel_regularizer=regularizers.l2( self.l2))(censored_ti_input)) ti2_dense = MCDropout(self.dropout_rate, disable=self.disable_dropout)( Dense(units=self.num_hidden[2], kernel_initializer=self.initializer, name='ti2_dense', activation=self.activation, kernel_regularizer=regularizers.l2( self.l2))(censored_ti2_input)) v_dense = MCDropout(self.dropout_rate, disable=self.disable_dropout)( Dense(units=self.num_hidden[2], kernel_initializer=self.initializer, name='v_dense', activation=self.activation, kernel_regularizer=regularizers.l2( self.l2))(censored_v_input)) cr_dense = MCDropout(self.dropout_rate, disable=self.disable_dropout)( Dense(units=self.num_hidden[2], kernel_initializer=self.initializer, name='cr_dense', activation=self.activation, kernel_regularizer=regularizers.l2( self.l2))(censored_cr_input)) mn_dense = MCDropout(self.dropout_rate, disable=self.disable_dropout)( Dense(units=self.num_hidden[2], kernel_initializer=self.initializer, name='mn_dense', activation=self.activation, kernel_regularizer=regularizers.l2( self.l2))(censored_mn_input)) co_dense = MCDropout(self.dropout_rate, disable=self.disable_dropout)( Dense(units=self.num_hidden[2], kernel_initializer=self.initializer, name='co_dense', activation=self.activation, kernel_regularizer=regularizers.l2( self.l2))(censored_co_input)) ni_dense = MCDropout(self.dropout_rate, disable=self.disable_dropout)( Dense(units=self.num_hidden[2], kernel_initializer=self.initializer, name='ni_dense', activation=self.activation, kernel_regularizer=regularizers.l2( self.l2))(censored_ni_input)) # get neurones from each elements from censored spectra c_dense_2 = MCDropout(self.dropout_rate, disable=self.disable_dropout)( Dense(units=self.num_hidden[3] * 4, kernel_initializer=self.initializer, activation=self.activation, name='c_dense_2')(c_dense)) c1_dense_2 = MCDropout(self.dropout_rate, disable=self.disable_dropout)(Dense( units=self.num_hidden[3], kernel_initializer=self.initializer, activation=self.activation, name='c1_dense_2')(c1_dense)) n_dense_2 = MCDropout(self.dropout_rate, disable=self.disable_dropout)( Dense(units=self.num_hidden[3] * 4, kernel_initializer=self.initializer, activation=self.activation, name='n_dense_2')(n_dense)) o_dense_2 = MCDropout(self.dropout_rate, disable=self.disable_dropout)( Dense(units=self.num_hidden[3], kernel_initializer=self.initializer, activation=self.activation, name='o_dense_2')(o_dense)) na_dense_2 = MCDropout(self.dropout_rate, disable=self.disable_dropout)(Dense( units=self.num_hidden[3], kernel_initializer=self.initializer, activation=self.activation, name='na_dense_2')(na_dense)) mg_dense_2 = MCDropout(self.dropout_rate, disable=self.disable_dropout)(Dense( units=self.num_hidden[3], kernel_initializer=self.initializer, activation=self.activation, name='mg_dense_2')(mg_dense)) al_dense_2 = MCDropout(self.dropout_rate, disable=self.disable_dropout)(Dense( units=self.num_hidden[3], kernel_initializer=self.initializer, activation=self.activation, name='al_dense_2')(al_dense)) si_dense_2 = MCDropout(self.dropout_rate, disable=self.disable_dropout)(Dense( units=self.num_hidden[3], kernel_initializer=self.initializer, activation=self.activation, name='si_dense_2')(si_dense)) p_dense_2 = MCDropout(self.dropout_rate, disable=self.disable_dropout)( Dense(units=self.num_hidden[3], kernel_initializer=self.initializer, activation=self.activation, name='p_dense_2')(p_dense)) s_dense_2 = MCDropout(self.dropout_rate, disable=self.disable_dropout)( Dense(units=self.num_hidden[3], kernel_initializer=self.initializer, activation=self.activation, name='s_dense_2')(s_dense)) k_dense_2 = MCDropout(self.dropout_rate, disable=self.disable_dropout)( Dense(units=self.num_hidden[3], kernel_initializer=self.initializer, activation=self.activation, name='k_dense_2')(k_dense)) ca_dense_2 = MCDropout(self.dropout_rate, disable=self.disable_dropout)(Dense( units=self.num_hidden[3], kernel_initializer=self.initializer, activation=self.activation, name='ca_dense_2')(ca_dense)) ti_dense_2 = MCDropout(self.dropout_rate, disable=self.disable_dropout)(Dense( units=self.num_hidden[3], kernel_initializer=self.initializer, activation=self.activation, name='ti_dense_2')(ti_dense)) ti2_dense_2 = MCDropout(self.dropout_rate, disable=self.disable_dropout)(Dense( units=self.num_hidden[3], kernel_initializer=self.initializer, activation=self.activation, name='ti2_dense_2')(ti2_dense)) v_dense_2 = MCDropout(self.dropout_rate, disable=self.disable_dropout)( Dense(units=self.num_hidden[3], kernel_initializer=self.initializer, activation=self.activation, name='v_dense_2')(v_dense)) cr_dense_2 = MCDropout(self.dropout_rate, disable=self.disable_dropout)(Dense( units=self.num_hidden[3], kernel_initializer=self.initializer, activation=self.activation, name='cr_dense_2')(cr_dense)) mn_dense_2 = MCDropout(self.dropout_rate, disable=self.disable_dropout)(Dense( units=self.num_hidden[3], kernel_initializer=self.initializer, activation=self.activation, name='mn_dense_2')(mn_dense)) co_dense_2 = MCDropout(self.dropout_rate, disable=self.disable_dropout)(Dense( units=self.num_hidden[3], kernel_initializer=self.initializer, activation=self.activation, name='co_dense_2')(co_dense)) ni_dense_2 = MCDropout(self.dropout_rate, disable=self.disable_dropout)(Dense( units=self.num_hidden[3], kernel_initializer=self.initializer, activation=self.activation, name='ni_dense_2')(ni_dense)) # Basically the same as ApogeeBCNN structure cnn_layer_1 = Conv1D(kernel_initializer=self.initializer, padding="same", filters=self.num_filters[0], kernel_size=self.filter_len, kernel_regularizer=regularizers.l2( self.l2))(input_tensor) activation_1 = Activation(activation=self.activation)(cnn_layer_1) dropout_1 = MCDropout(self.dropout_rate, disable=self.disable_dropout)(activation_1) cnn_layer_2 = Conv1D(kernel_initializer=self.initializer, padding="same", filters=self.num_filters[1], kernel_size=self.filter_len, kernel_regularizer=regularizers.l2( self.l2))(dropout_1) activation_2 = Activation(activation=self.activation)(cnn_layer_2) maxpool_1 = MaxPooling1D(pool_size=self.pool_length)(activation_2) flattener = Flatten()(maxpool_1) dropout_2 = MCDropout(self.dropout_rate, disable=self.disable_dropout)(flattener) layer_3 = Dense(units=self.num_hidden[0], kernel_regularizer=regularizers.l2(self.l2), kernel_initializer=self.initializer)(dropout_2) activation_3 = Activation(activation=self.activation)(layer_3) dropout_3 = MCDropout(self.dropout_rate, disable=self.disable_dropout)(activation_3) layer_4 = Dense(units=self.num_hidden[1], kernel_regularizer=regularizers.l2(self.l2), kernel_initializer=self.initializer)(dropout_3) activation_4 = Activation(activation=self.activation)(layer_4) teff_output = Dense(units=1)(activation_4) logg_output = Dense(units=1)(activation_4) fe_output = Dense(units=1)(activation_4) old_3_output_wo_grad = StopGrad()(concatenate( [teff_output, logg_output, fe_output])) teff_output_var = Dense(units=1)(activation_4) logg_output_var = Dense(units=1)(activation_4) fe_output_var = Dense(units=1)(activation_4) aux_fullspec = Dense(units=self.num_hidden[4], kernel_initializer=self.initializer, kernel_constraint=MaxNorm(self.maxnorm), name='aux_fullspec')(activation_4) fullspec_hidden = concatenate([aux_fullspec, old_3_output_wo_grad]) # get the final answer c_concat = Dense(units=1, name='c_concat')(concatenate( [c_dense_2, fullspec_hidden])) c1_concat = Dense(units=1, name='c1_concat')(concatenate( [c1_dense_2, fullspec_hidden])) n_concat = Dense(units=1, name='n_concat')(concatenate( [n_dense_2, fullspec_hidden])) o_concat = Dense(units=1, name='o_concat')(concatenate( [o_dense_2, fullspec_hidden])) na_concat = Dense(units=1, name='na_concat')(concatenate( [na_dense_2, fullspec_hidden])) mg_concat = Dense(units=1, name='mg_concat')(concatenate( [mg_dense_2, fullspec_hidden])) al_concat = Dense(units=1, name='al_concat')(concatenate( [al_dense_2, fullspec_hidden])) si_concat = Dense(units=1, name='si_concat')(concatenate( [si_dense_2, fullspec_hidden])) p_concat = Dense(units=1, name='p_concat')(concatenate( [p_dense_2, fullspec_hidden])) s_concat = Dense(units=1, name='s_concat')(concatenate( [s_dense_2, fullspec_hidden])) k_concat = Dense(units=1, name='k_concat')(concatenate( [k_dense_2, fullspec_hidden])) ca_concat = Dense(units=1, name='ca_concat')(concatenate( [ca_dense_2, fullspec_hidden])) ti_concat = Dense(units=1, name='ti_concat')(concatenate( [ti_dense_2, fullspec_hidden])) ti2_concat = Dense(units=1, name='ti2_concat')(concatenate( [ti2_dense_2, fullspec_hidden])) v_concat = Dense(units=1, name='v_concat')(concatenate( [v_dense_2, fullspec_hidden])) cr_concat = Dense(units=1, name='cr_concat')(concatenate( [cr_dense_2, fullspec_hidden])) mn_concat = Dense(units=1, name='mn_concat')(concatenate( [mn_dense_2, fullspec_hidden])) co_concat = Dense(units=1, name='co_concat')(concatenate( [co_dense_2, fullspec_hidden])) ni_concat = Dense(units=1, name='ni_concat')(concatenate( [ni_dense_2, fullspec_hidden])) # get the final predictive uncertainty c_concat_var = Dense(units=1, name='c_concat_var')(concatenate( [c_dense_2, fullspec_hidden])) c1_concat_var = Dense(units=1, name='c1_concat_var')(concatenate( [c1_dense_2, fullspec_hidden])) n_concat_var = Dense(units=1, name='n_concat_var')(concatenate( [n_dense_2, fullspec_hidden])) o_concat_var = Dense(units=1, name='o_concat_var')(concatenate( [o_dense_2, fullspec_hidden])) na_concat_var = Dense(units=1, name='na_concat_var')(concatenate( [na_dense_2, fullspec_hidden])) mg_concat_var = Dense(units=1, name='mg_concat_var')(concatenate( [mg_dense_2, fullspec_hidden])) al_concat_var = Dense(units=1, name='al_concat_var')(concatenate( [al_dense_2, fullspec_hidden])) si_concat_var = Dense(units=1, name='si_concat_var')(concatenate( [si_dense_2, fullspec_hidden])) p_concat_var = Dense(units=1, name='p_concat_var')(concatenate( [p_dense_2, fullspec_hidden])) s_concat_var = Dense(units=1, name='s_concat_var')(concatenate( [s_dense_2, fullspec_hidden])) k_concat_var = Dense(units=1, name='k_concat_var')(concatenate( [k_dense_2, fullspec_hidden])) ca_concat_var = Dense(units=1, name='ca_concat_var')(concatenate( [ca_dense_2, fullspec_hidden])) ti_concat_var = Dense(units=1, name='ti_concat_var')(concatenate( [ti_dense_2, fullspec_hidden])) ti2_concat_var = Dense(units=1, name='ti2_concat_var')(concatenate( [ti2_dense_2, fullspec_hidden])) v_concat_var = Dense(units=1, name='v_concat_var')(concatenate( [v_dense_2, fullspec_hidden])) cr_concat_var = Dense(units=1, name='cr_concat_var')(concatenate( [cr_dense_2, fullspec_hidden])) mn_concat_var = Dense(units=1, name='mn_concat_var')(concatenate( [mn_dense_2, fullspec_hidden])) co_concat_var = Dense(units=1, name='co_concat_var')(concatenate( [co_dense_2, fullspec_hidden])) ni_concat_var = Dense(units=1, name='ni_concat_var')(concatenate( [ni_dense_2, fullspec_hidden])) # concatenate answer output = concatenate([ teff_output, logg_output, c_concat, c1_concat, n_concat, o_concat, na_concat, mg_concat, al_concat, si_concat, p_concat, s_concat, k_concat, ca_concat, ti_concat, ti2_concat, v_concat, cr_concat, mn_concat, fe_output, co_concat, ni_concat ], name='output') # concatenate predictive uncertainty variance_output = concatenate([ teff_output_var, logg_output_var, c_concat_var, c1_concat_var, n_concat_var, o_concat_var, na_concat_var, mg_concat_var, al_concat_var, si_concat_var, p_concat_var, s_concat_var, k_concat_var, ca_concat_var, ti_concat_var, ti2_concat_var, v_concat_var, cr_concat_var, mn_concat_var, fe_output_var, co_concat_var, ni_concat_var ], name='variance_output') model = Model(inputs=[input_tensor, labels_err_tensor], outputs=[output, variance_output]) # new astroNN high performance dropout variational inference on GPU expects single output model_prediction = Model(inputs=input_tensor, outputs=concatenate([output, variance_output])) variance_loss = mse_var_wrapper(output, labels_err_tensor) output_loss = mse_lin_wrapper(variance_output, labels_err_tensor) return model, model_prediction, output_loss, variance_loss
def model(self): input_tensor = Input(shape=self._input_shape, name='input') # training data labels_err_tensor = Input(shape=(self._labels_shape, ), name='labels_err') # extract spectra from input data and expand_dims for convolution spectra = Lambda(lambda x: tf.expand_dims(x, axis=-1))(BoolMask( self.specmask())(input_tensor)) # value to denorm magnitude app_mag = BoolMask(self.magmask())(input_tensor) # tf.convert_to_tensor(self.input_mean[self.magmask()]) denorm_mag = DeNormAdd(self.input_mean[self.magmask()])(app_mag) inv_pow_mag = Lambda(lambda mag: tf.pow(10., tf.multiply(-0.2, mag)))( denorm_mag) # data to infer Gia DR2 offset gaia_aux_data = BoolMask(self.gaia_aux_mask())(input_tensor) gaia_aux_hidden = MCDropout(self.dropout_rate, disable=self.disable_dropout)(Dense( units=18, kernel_regularizer=regularizers.l2( self.l2), kernel_initializer=self.initializer, activation='tanh')(gaia_aux_data)) offset = Dense(units=1, kernel_initializer=self.initializer, activation='tanh', name='offset_output')(gaia_aux_hidden) # good old NN takes spectra and output fakemag cnn_layer_1 = Conv1D(kernel_initializer=self.initializer, padding="same", filters=self.num_filters[0], kernel_size=self.filter_len, kernel_regularizer=regularizers.l2( self.l2))(spectra) activation_1 = Activation(activation=self.activation)(cnn_layer_1) dropout_1 = MCDropout(self.dropout_rate, disable=self.disable_dropout)(activation_1) cnn_layer_2 = Conv1D(kernel_initializer=self.initializer, padding="same", filters=self.num_filters[1], kernel_size=self.filter_len, kernel_regularizer=regularizers.l2( self.l2))(dropout_1) activation_2 = Activation(activation=self.activation)(cnn_layer_2) maxpool_1 = MaxPooling1D(pool_size=self.pool_length)(activation_2) flattener = Flatten()(maxpool_1) dropout_2 = MCDropout(self.dropout_rate, disable=self.disable_dropout)(flattener) layer_3 = Dense(units=self.num_hidden[0], kernel_regularizer=regularizers.l2(self.l2), kernel_initializer=self.initializer)(dropout_2) activation_3 = Activation(activation=self.activation)(layer_3) dropout_3 = MCDropout(self.dropout_rate, disable=self.disable_dropout)(activation_3) layer_4 = Dense(units=self.num_hidden[1], kernel_regularizer=regularizers.l2(self.l2), kernel_initializer=self.initializer)(dropout_3) activation_4 = Activation(activation=self.activation)(layer_4) fakemag_output = Dense(units=self._labels_shape, activation='softplus', name='fakemag_output')(activation_4) fakemag_variance_output = Dense( units=self._labels_shape, activation='linear', name='fakemag_variance_output')(activation_4) # multiplt a pre-determined de-normalization factor, such that fakemag std approx. 1 for Sloan APOGEE population _fakemag_denorm = Lambda(lambda x: tf.multiply(x, 68.))(fakemag_output) _fakemag_var_denorm = Lambda(lambda x: tf.add(x, tf.log(68.)))( fakemag_variance_output) _fakemag_parallax = Multiply()([_fakemag_denorm, inv_pow_mag]) # output parallax output = Add(name='output')([_fakemag_parallax, offset]) variance_output = Lambda(lambda x: tf.log( tf.abs(tf.multiply(x[2], tf.divide(tf.exp(x[0]), x[1])))), name='variance_output')([ fakemag_variance_output, fakemag_output, _fakemag_parallax ]) model = Model(inputs=[input_tensor, labels_err_tensor], outputs=[output, variance_output]) # new astroNN high performance dropout variational inference on GPU expects single output # while training with parallax, we want testing output fakemag model_prediction = Model(inputs=[input_tensor], outputs=concatenate( [_fakemag_denorm, _fakemag_var_denorm])) variance_loss = mse_var_wrapper(output, labels_err_tensor) output_loss = mse_lin_wrapper(variance_output, labels_err_tensor) return model, model_prediction, output_loss, variance_loss
def compile(self, optimizer=None, loss=None, metrics=None, weighted_metrics=None, loss_weights=None, sample_weight_mode=None): if optimizer is not None: self.optimizer = optimizer elif self.optimizer is None or self.optimizer == 'adam': self.optimizer = Adam(learning_rate=self.lr, beta_1=self.beta_1, beta_2=self.beta_2, epsilon=self.optimizer_epsilon, decay=0.0) if metrics is not None: self.metrics = metrics if self.task == 'regression': if self._last_layer_activation is None: self._last_layer_activation = 'linear' elif self.task == 'classification': if self._last_layer_activation is None: self._last_layer_activation = 'softmax' elif self.task == 'binary_classification': if self._last_layer_activation is None: self._last_layer_activation = 'sigmoid' else: raise RuntimeError( 'Only "regression", "classification" and "binary_classification" are supported' ) self.keras_model, self.keras_model_predict, self.output_loss, self.variance_loss = self.model( ) if self.task == 'regression': self._output_loss = lambda predictive, labelerr: mse_lin_wrapper( predictive, labelerr) elif self.task == 'classification': self._output_loss = lambda predictive, labelerr: bayesian_categorical_crossentropy_wrapper( predictive) elif self.task == 'binary_classification': self._output_loss = lambda predictive, labelerr: bayesian_binary_crossentropy_wrapper( predictive) else: raise RuntimeError( 'Only "regression", "classification" and "binary_classification" are supported' ) # all zero losss as dummy lose if self.task == 'regression': self.metrics = [mean_absolute_error, mean_error ] if not self.metrics else self.metrics self.keras_model.compile(optimizer=self.optimizer, loss=zeros_loss, metrics=self.metrics, weighted_metrics=weighted_metrics, sample_weight_mode=sample_weight_mode) elif self.task == 'classification': self.metrics = [categorical_accuracy ] if not self.metrics else self.metrics self.keras_model.compile(optimizer=self.optimizer, loss=zeros_loss, metrics={'output': self.metrics}, weighted_metrics=weighted_metrics, sample_weight_mode=sample_weight_mode) elif self.task == 'binary_classification': self.metrics = [binary_accuracy ] if not self.metrics else self.metrics self.keras_model.compile(optimizer=self.optimizer, loss=zeros_loss, metrics={'output': self.metrics}, weighted_metrics=weighted_metrics, sample_weight_mode=sample_weight_mode) # inject custom training step if needed try: self.custom_train_step() except NotImplementedError: pass except TypeError: self.keras_model.train_step = self.custom_train_step # inject custom testing step if needed try: self.custom_test_step() except NotImplementedError: pass except TypeError: self.keras_model.test_step = self.custom_test_step return None