def equations(self, state, t): v, n = tf.unstack(state) dv = (-self.model_parameters[3] * (0.5 * (1 + tf.tanh((v - self.model_parameters[7]) / self.model_parameters[8]))) * (v - self.model_parameters[2]) - self.model_parameters[1] * n * (v - self.model_parameters[0]) - self.model_parameters[5] * (v - self.model_parameters[4]) + self.model_parameters[11]) dn = (self.model_parameters[6] * ((0.5 * (1 + tf.tanh((v - self.model_parameters[9]) / self.model_parameters[10]))) - n)) \ / (1 / tf.cosh((v - self.model_parameters[9]) / (2 * self.model_parameters[10]))) return tf.stack([dv, dn])
def _forward_log_det_jacobian(self, x): # y = sinh((arcsinh(x) + skewness) * tailweight) # Using sinh' = cosh, arcsinh'(x) = 1 / sqrt(x**2 + 1), # dy/dx # = cosh((arcsinh(x) + skewness) * tailweight) * tailweight / sqrt(x**2 + 1) # This is computed inside the log to avoid catastrophic cancellations # from cosh((arcsinh(x) + skewness) * tailweight) and sqrt(x**2 + 1). return (tf.log( tf.cosh((tf.asinh(x) + self.skewness) * self.tailweight) # TODO(srvasude): Consider using cosh(arcsinh(x)) in cases # where (arcsinh(x) + skewness) * tailweight ~= arcsinh(x). / _sqrtx2p1(x)) + tf.log(self.tailweight))
def _inverse_log_det_jacobian(self, y): # x = sinh(arcsinh(y) / tailweight - skewness) # Using sinh' = cosh, arcsinh'(y) = 1 / sqrt(y**2 + 1), # dx/dy # = cosh(arcsinh(y) / tailweight - skewness) # / (tailweight * sqrt(y**2 + 1)) # This is computed inside the log to avoid catastrophic cancellations # from cosh((arcsinh(y) / tailweight) - skewness) and sqrt(x**2 + 1). return (tf.log( tf.cosh(tf.asinh(y) / self.tailweight - self.skewness) # TODO(srvasude): Consider using cosh(arcsinh(x)) in cases # where (arcsinh(x) / tailweight) - skewness ~= arcsinh(x). / _sqrtx2p1(y)) - tf.log(self.tailweight))
def phi_of_z(self, rho, z): return 2e6 / 37. * (rho * 0.8 * tf.math.log(tf.cosh(z / 2.3e-01)) * 2.3e-01**2. + rho * 0.2 * z**2. / 2.)
def __init__(self, input_shape, scope, args): assert len(input_shape) == 3 self.input_shape = input_shape # (W, H, Channels) self.scope = scope self.MASKS = args.masks self.Z_SIZE = args.z_size self.EPSILON = 1e-8 self.checkpoint_path = args.checkpoint_path if not os.path.exists(self.checkpoint_path): os.makedirs(self.checkpoint_path) self.trained_epochs = tf.Variable(0, dtype=tf.int32, name='trained_epochs', trainable=False) self.inc_trained_epochs = self.trained_epochs.assign_add(1) ## Build net with tf.variable_scope('input'): self.x_in = tf.placeholder(name="x_in", dtype="float", shape=(None, ) + self.input_shape) # Batch, W, H, Channels self.z_in = tf.placeholder(name="z_in", dtype="float", shape=(None, ) + (self.Z_SIZE,) ) # Batch, Z self.mask = tf.placeholder(name="mask", dtype="float", shape=(None, ) + self.input_shape[:-1] + (1,) ) # Batch, W, H, 1 with tf.variable_scope('is_training'): self.is_training = tf.placeholder(tf.bool, name="is_training") with tf.variable_scope('kl_tolerance'): self.kl_tolerance = tf.placeholder(name="kl_tolerance", dtype=tf.float32) # def build_VAE(x_in, mask, is_training, kl_tolerance, Z_SIZE): """ x_in (tf.placeholder): input (and target output) of the autoencoder network mask (tf.placeholder): is_person mask. Where this mask is True normal reconstruction_loss is computed. where it is False, loss is set to 0. is_training (tf.placeholder): is training kl_tolerance (scalar, or tf.placeholder): Z_SIZE (scalar): size of the latent z dimension """ is_training = self.is_training x = self.x_in _7 = 7 if input_shape[0] > 64 else 1 # either 1 or 7 (whether input is lidar or image) _3 = 3 if input_shape[0] > 64 else 1 # either 1 or 3 _3_else_2 = 3 if input_shape[0] > 64 else 2 with tf.variable_scope('encoder'): print("A0: {}".format(x.shape)) x = tf.layers.batch_normalization( tf.nn.relu(U.conv2d(x, 64, "l1", [_7, 7], [_3, 3], pad="SAME", summary_tag="Conv/Layer1")), training=is_training) print("A1: {}".format(x.shape)) x = tf.layers.max_pooling2d(x, (_3, 3), (_3, 3), padding="SAME", name="Conv/MaxPool") xres = x print("A2: {}".format(x.shape)) x = tf.layers.batch_normalization( tf.nn.relu(U.conv2d(x, 64, "l2", [_3, 3], [1, 1], pad="SAME", summary_tag="Conv/Layer2")), training=is_training) print("A3: {}".format(x.shape)) x = tf.layers.batch_normalization( U.conv2d(x, 64, "l3", [_3, 3], [1, 1], pad="SAME", summary_tag="Conv/Layer3"), training=is_training) print("A4: {}".format(x.shape)) xres2 = x x = tf.nn.relu(x + xres) x = tf.layers.batch_normalization( tf.nn.relu(U.conv2d(x, 64, "l4", [_3_else_2, 3], [1, 1], pad="SAME", summary_tag="Conv/Layer4")), training=is_training) print("A5: {}".format(x.shape)) x = tf.layers.batch_normalization( U.conv2d(x, 64, "l5", [_3_else_2, 3], [1, 1], pad="SAME", summary_tag="Conv/Layer5"), training=is_training) print("A6: {}".format(x.shape)) x = tf.nn.relu(x + xres2) x = tf.layers.average_pooling2d(x, (_3, 3), (_3, 3), padding="SAME", name="Conv/AvgPool") endconv_shape = x.shape print("A7: {}".format(x.shape)) x = U.flattenallbut0(x) endconv_flat_shape = x.shape print("A8: {}".format(x.shape)) x = tf.nn.relu(tf.layers.dense(x, 512, name='lin', kernel_initializer=U.normc_initializer(1.0))) print("A9: {}".format(x.shape)) tf.summary.histogram("encoder/lin/output", x) with tf.variable_scope('latent_space'): z_mu = tf.nn.sigmoid(tf.layers.dense(x, self.Z_SIZE, name='z_mu', kernel_initializer=U.normc_initializer(1.0))) z_logvar = tf.nn.relu(tf.layers.dense(x, self.Z_SIZE, name='z_logvar', kernel_initializer=U.normc_initializer(1.0))) z_sigma = tf.exp(z_logvar/2.0) z = tf.contrib.distributions.Normal(loc=z_mu, scale=z_sigma) x = z.sample(1)[0] print("Z: {}".format(x.shape)) self.z_mu = z_mu self.z_sigma = z_sigma self.z = z self.z_sample = x def build_decoder(z, is_training=self.is_training, output_shape=self.input_shape, scopename="decoder", reuse=False): with tf.variable_scope(scopename, reuse=reuse) as scope: x = z x = tf.nn.relu(tf.layers.dense(x, 512, name='z_inv', kernel_initializer=U.normc_initializer(1.0))) print("A9: {}".format(x.shape)) x = tf.nn.relu(tf.layers.dense(x, endconv_flat_shape[1], name='lin_inv', kernel_initializer=U.normc_initializer(1.0))) print("A8: {}".format(x.shape)) x = tf.reshape(x, (-1, endconv_shape[1], endconv_shape[2], endconv_shape[3])) print("A7: {}".format(x.shape)) # 'opposite' of average_pooling2d with stride # x = tf.image.resize_nearest_neighbor(x, (1*x.shape[1], 3*x.shape[2]), align_corners=True) x = tf.layers.conv2d_transpose(x, 64, (_3, 3), (_3, 3), activation=tf.nn.relu, padding="SAME", name="avgpool_inv") xres2 = x print("A6: {}".format(x.shape)) x = tf.layers.batch_normalization( tf.layers.conv2d_transpose(x, 64, (_3_else_2, 3), (1, 1), activation=tf.nn.relu, padding="SAME", name="l5_inv"), training=is_training) print("A5: {}".format(x.shape)) x = tf.layers.batch_normalization( tf.layers.conv2d_transpose(x, 64, (_3_else_2, 3), (1, 1), activation=tf.nn.relu, padding="SAME", name="l4_inv"), training=is_training) x = tf.nn.relu(x + xres2) xres = x print("A4: {}".format(x.shape)) x = tf.layers.batch_normalization( tf.layers.conv2d_transpose(x, 64, (_3, 3), (1, 1), activation=tf.nn.relu, padding="SAME", name="l3_inv"), training=is_training) print("A3: {}".format(x.shape)) x = tf.layers.batch_normalization( tf.layers.conv2d_transpose(x, 64, (_3, 3), (1, 1), activation=tf.nn.relu, padding="SAME", name="l2_inv"), training=is_training) print("A2: {}".format(x.shape)) x = tf.nn.relu(x + xres) x = tf.layers.conv2d_transpose(x, 64, (_3, 3), (_3, 3), activation=tf.nn.relu, padding="SAME", name="maxpool_inv") print("A1: {}".format(x.shape)) x = tf.layers.batch_normalization( tf.layers.conv2d_transpose(x, output_shape[2], (_7, 7), (_3, 3), activation=tf.nn.relu, padding="SAME", name="l1_inv"), training=is_training) print("A0: {}".format(x.shape)) y = x return y self.y = build_decoder(self.z_sample) # This must be done before creating the pure decoder, or tf will expect z_in to be fed self.batch_norm_update_op = tf.get_collection(tf.GraphKeys.UPDATE_OPS) # Create a separate decoder network with same variables fed by placeholder, not encoder # for off-training reconstruction self.reconstruction = build_decoder(self.z_in, reuse=True) # Losses with tf.variable_scope('reconstruction_loss'): self.avg_rec_abs_error = tf.reduce_mean(tf.abs(self.x_in - self.y), reduction_indices=[0,1,2]) # per channel # reconstruction_s_e = tf.square((self.x_in - self.y) / 255) # reconstruction square of normalized error reconstruction_s_e = tf.log(tf.cosh((self.x_in - self.y) / 255)) # reconstruction square of normalized error if self.MASKS: # apply mask (W, H) to p.pixel error (Batch, W, H, Channels) reconstruction_s_e = tf.boolean_mask(reconstruction_s_e, self.mask) reconstruction_loss = tf.reduce_mean(reconstruction_s_e, reduction_indices=[1,2,3]) # per example self.reconstruction_loss = tf.reduce_mean(reconstruction_loss) # average over batch # kl loss (reduce along z dimensions) kl_loss = - 0.5 * tf.reduce_mean( (1 + z_logvar - tf.square(z_mu) - tf.exp(z_logvar)), reduction_indices = 1 ) kl_loss = tf.maximum(kl_loss, self.kl_tolerance) # kl_loss per example self.kl_loss = tf.reduce_mean(kl_loss) # batch kl_loss self.loss = self.reconstruction_loss + self.kl_loss # add tensorboard summaries for var in tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES): variable_summaries(var) self.merged_summaries = tf.summary.merge_all() # A placeholder for adding arbitrary images to tensorboard self.image_tensor = tf.placeholder(name="image", dtype="float", shape=(None, 1000, 1000, 4)) # Batch, W, H, Channels self.image_summary = tf.summary.image("Reconstructions/val", self.image_tensor) self.image_tensor2 = tf.placeholder(name="image2", dtype="float", shape=(None, 1000, 1000, 4)) # Batch, W, H, Channels self.image_summary2 = tf.summary.image("Reconstructions/valtarget", self.image_tensor2)
def tfe_cosh(t): return tf.cosh(t)
def execute_cosh(self): return tf.cosh(self.a, name="cosh" + str(self.node_num))
def cosh(x): return tf.cosh(x)
def grad(dy): b_x = beta * x return dy * beta * (2 - b_x * tf.tanh(b_x * 0.5)) / (1 + tf.cosh(b_x))
def func(x): return tf.cosh(x)
def dtfsinh(y, x): d[x] = d[y] * tf.cosh(x)
def _tf_cosh(x): return tf.cosh(x)
'sigmoid': Node('sigmoid', 1, lambda c, input: tf.sigmoid(c[0].build(input))), 'swish': Node('swish', 1, lambda c, input: tf.nn.swish(c[0].build(input))), # Relus 'relu': Node('relu', 1, lambda c, input: tf.nn.relu(c[0].build(input))), 'relu6': Node('relu6', 1, lambda c, input: tf.nn.relu6(c[0].build(input))), # 'lrelu': Node('lrelu', 1, lambda c, input: tf.nn.leaky_relu(c[0].build(input))), 'selu': Node('selu', 1, lambda c, input: tf.nn.selu(c[0].build(input))), 'elu': Node('elu', 1, lambda c, input: tf.nn.elu(c[0].build(input))), 'prelu': Node('prelu', 1, lambda c, input: prelu(input)), # Softies 'softmax': Node('softmax', 1, lambda c, input: tf.nn.softmax(c[0].build(input))), # 'softplus': Node('softplus', 1, lambda c, input: tf.nn.softplus(c[0].build(input))), 'softsign': Node('softsign', 1, lambda c, input: tf.nn.softsign(c[0].build(input))), # Hyperbolic 'sinh': Node('sinh', 1, lambda c, input: tf.sinh(c[0].build(input))), # 'cosh': Node('cosh', 1, lambda c, input: tf.cosh(c[0].build(input))), # 'tanh': Node('tanh', 1, lambda c, input: tf.tanh(c[0].build(input))), # Input 'x': Node('x', 0, lambda c, input: input) } def prelu(x): alpha = parse_node('l0') pos = tf.nn.relu(x) neg = alpha * (x - tf.abs(x)) * 0.5 return pos + neg class TreeActivationLayer(tf.keras.layers.Layer): def __init__(self, num_outputs):
def ttftanh(y, x): cx = tf.cosh(x) d[y] = d[x] / (cx * cx)
def logcosh(x): return tf.log(tf.cosh(x))
def solve_SIR2COVID(R): log_out_path = R['FolderName'] # 将路径从字典 R 中提取出来 if not os.path.exists(log_out_path): # 判断路径是否已经存在 os.mkdir(log_out_path) # 无 log_out_path 路径,创建一个 log_out_path 路径 log_fileout = open(os.path.join(log_out_path, 'log_train.txt'), 'w') # 在这个路径下创建并打开一个可写的 log_train.txt文件 DNN_LogPrint.dictionary_out2file(R, log_fileout) log2trianSolus = open(os.path.join(log_out_path, 'train_Solus.txt'), 'w') # 在这个路径下创建并打开一个可写的 log_train.txt文件 log2testSolus = open(os.path.join(log_out_path, 'test_Solus.txt'), 'w') # 在这个路径下创建并打开一个可写的 log_train.txt文件 log2testSolus2 = open(os.path.join(log_out_path, 'test_Solus_temp.txt'), 'w') # 在这个路径下创建并打开一个可写的 log_train.txt文件 log2testParas = open(os.path.join(log_out_path, 'test_Paras.txt'), 'w') # 在这个路径下创建并打开一个可写的 log_train.txt文件 trainSet_szie = R['size2train'] batchSize_train = R['batch_size2train'] batchSize_test = R['batch_size2test'] pt_penalty_init = R[ 'init_penalty2predict_true'] # Regularization parameter for difference of predict and true wb_penalty = R['regular_weight'] # Regularization parameter for weights lr_decay = R['lr_decay'] learning_rate = R['learning_rate'] act_func2SIR = R['act2sir'] act_func2paras = R['act2paras'] input_dim = R['input_dim'] out_dim = R['output_dim'] flag2S = 'WB2S' flag2I = 'WB2I' flag2R = 'WB2R' flag2beta = 'WB2beta' flag2gamma = 'WB2gamma' hidden_sir = R['hidden2SIR'] hidden_para = R['hidden2para'] Weight2S, Bias2S = DNN.init_DNN(size2in=input_dim, size2out=out_dim, hiddens=hidden_sir, scope=flag2S, opt2Init=R['SIR_opt2init_NN']) Weight2I, Bias2I = DNN.init_DNN(size2in=input_dim, size2out=out_dim, hiddens=hidden_sir, scope=flag2I, opt2Init=R['SIR_opt2init_NN']) Weight2R, Bias2R = DNN.init_DNN(size2in=input_dim, size2out=out_dim, hiddens=hidden_sir, scope=flag2R, opt2Init=R['SIR_opt2init_NN']) Weight2beta, Bias2beta = DNN.init_DNN(size2in=input_dim, size2out=out_dim, hiddens=hidden_para, scope=flag2beta, opt2Init=R['Para_opt2init_NN']) Weight2gamma, Bias2gamma = DNN.init_DNN(size2in=input_dim, size2out=out_dim, hiddens=hidden_para, scope=flag2gamma, opt2Init=R['Para_opt2init_NN']) global_steps = tf.Variable(0, trainable=False) with tf.device('/gpu:%s' % (R['gpuNo'])): with tf.variable_scope('vscope', reuse=tf.AUTO_REUSE): T_it = tf.placeholder(tf.float32, name='T_it', shape=[None, input_dim]) I_observe = tf.placeholder(tf.float32, name='I_observe', shape=[None, input_dim]) N_observe = tf.placeholder(tf.float32, name='N_observe', shape=[None, input_dim]) predict_true_penalty = tf.placeholder_with_default(input=1e3, shape=[], name='bd_p') in_learning_rate = tf.placeholder_with_default(input=1e-5, shape=[], name='lr') train_opt = tf.placeholder_with_default(input=True, shape=[], name='train_opt') SNN_temp = DNN.PDE_DNN(x=T_it, hiddenLayer=hidden_sir, Weigths=Weight2S, Biases=Bias2S, DNNmodel=R['model2sir'], activation=act_func2SIR, freqs=R['freqs']) INN_temp = DNN.PDE_DNN(x=T_it, hiddenLayer=hidden_sir, Weigths=Weight2I, Biases=Bias2I, DNNmodel=R['model2sir'], activation=act_func2SIR, freqs=R['freqs']) RNN_temp = DNN.PDE_DNN(x=T_it, hiddenLayer=hidden_sir, Weigths=Weight2R, Biases=Bias2R, DNNmodel=R['model2sir'], activation=act_func2SIR, freqs=R['freqs']) in_beta = DNN.PDE_DNN(x=T_it, hiddenLayer=hidden_para, Weigths=Weight2beta, Biases=Bias2beta, DNNmodel=R['model2paras'], activation=act_func2paras, freqs=R['freqs']) in_gamma = DNN.PDE_DNN(x=T_it, hiddenLayer=hidden_para, Weigths=Weight2gamma, Biases=Bias2gamma, DNNmodel=R['model2paras'], activation=act_func2paras, freqs=R['freqs']) # Remark: beta, gamma,S_NN.I_NN,R_NN都应该是正的. beta.1--15之间,gamma在(0,1)使用归一化的话S_NN.I_NN,R_NN都在[0,1)范围内 if (R['total_population'] == R['scale_population']) and R['scale_population'] != 1: beta = tf.square(in_beta) gamma = tf.nn.sigmoid(in_gamma) # SNN = SNN_temp # INN = INN_temp # RNN = RNN_temp # SNN = tf.nn.relu(SNN_temp) # INN = tf.nn.relu(INN_temp) # RNN = tf.nn.relu(RNN_temp) # SNN = tf.abs(SNN_temp) # INN = tf.abs(INN_temp) # RNN = tf.abs(RNN_temp) # SNN = DNN_base.gauss(SNN_temp) # INN = tf.square(INN_temp) # RNN = tf.square(RNN_temp) # SNN = DNN_base.gauss(SNN_temp) # INN = tf.square(INN_temp) # RNN = tf.nn.sigmoid(RNN_temp) # SNN = DNN_base.gauss(SNN_temp) # INN = tf.nn.sigmoid(INN_temp) # RNN = tf.square(RNN_temp) # SNN = tf.sqrt(tf.square(SNN_temp)) # INN = tf.sqrt(tf.square(INN_temp)) # RNN = tf.sqrt(tf.square(RNN_temp)) SNN = tf.nn.sigmoid(SNN_temp) INN = tf.nn.sigmoid(INN_temp) RNN = tf.nn.sigmoid(RNN_temp) # SNN = tf.tanh(SNN_temp) # INN = tf.tanh(INN_temp) # RNN = tf.tanh(RNN_temp) else: beta = tf.square(in_beta) gamma = tf.nn.sigmoid(in_gamma) # SNN = SNN_temp # INN = INN_temp # RNN = RNN_temp # SNN = tf.nn.relu(SNN_temp) # INN = tf.nn.relu(INN_temp) # RNN = tf.nn.relu(RNN_temp) SNN = tf.nn.sigmoid(SNN_temp) INN = tf.nn.sigmoid(INN_temp) RNN = tf.nn.sigmoid(RNN_temp) # SNN = tf.tanh(SNN_temp) # INN = tf.tanh(INN_temp) # RNN = tf.tanh(RNN_temp) N_NN = SNN + INN + RNN dSNN2t = tf.gradients(SNN, T_it)[0] dINN2t = tf.gradients(INN, T_it)[0] dRNN2t = tf.gradients(RNN, T_it)[0] dN_NN2t = tf.gradients(N_NN, T_it)[0] temp_snn2t = -beta * SNN * INN temp_inn2t = beta * SNN * INN - gamma * INN temp_rnn2t = gamma * INN if str.lower( R['loss_function']) == 'l2_loss' and R['scale_up'] == 0: # LossS_Net_obs = tf.reduce_mean(tf.square(SNN - S_observe)) LossI_Net_obs = tf.reduce_mean(tf.square(INN - I_observe)) # LossR_Net_obs = tf.reduce_mean(tf.square(RNN - R_observe)) LossN_Net_obs = tf.reduce_mean(tf.square(N_NN - N_observe)) Loss2dS = tf.reduce_mean(tf.square(dSNN2t - temp_snn2t)) Loss2dI = tf.reduce_mean(tf.square(dINN2t - temp_inn2t)) Loss2dR = tf.reduce_mean(tf.square(dRNN2t - temp_rnn2t)) Loss2dN = tf.reduce_mean(tf.square(dN_NN2t)) elif str.lower( R['loss_function']) == 'l2_loss' and R['scale_up'] == 1: scale_up = R['scale_factor'] # LossS_Net_obs = tf.reduce_mean(tf.square(scale_up*SNN - scale_up*S_observe)) LossI_Net_obs = tf.reduce_mean( tf.square(scale_up * INN - scale_up * I_observe)) # LossR_Net_obs = tf.reduce_mean(tf.square(scale_up*RNN - scale_up*R_observe)) LossN_Net_obs = tf.reduce_mean( tf.square(scale_up * N_NN - scale_up * N_observe)) Loss2dS = tf.reduce_mean( tf.square(scale_up * dSNN2t - scale_up * temp_snn2t)) Loss2dI = tf.reduce_mean( tf.square(scale_up * dINN2t - scale_up * temp_inn2t)) Loss2dR = tf.reduce_mean( tf.square(scale_up * dRNN2t - scale_up * temp_rnn2t)) Loss2dN = tf.reduce_mean(tf.square(scale_up * dN_NN2t)) elif str.lower(R['loss_function']) == 'lncosh_loss': # LossS_Net_obs = tf.reduce_mean(tf.ln(tf.cosh(SNN - S_observe))) LossI_Net_obs = tf.reduce_mean(tf.log(tf.cosh(INN - I_observe))) # LossR_Net_obs = tf.reduce_mean(tf.log(tf.cosh(RNN - R_observe))) LossN_Net_obs = tf.reduce_mean( tf.log(tf.cosh(N_NN - N_observe))) Loss2dS = tf.reduce_mean(tf.log(tf.cosh(dSNN2t - temp_snn2t))) Loss2dI = tf.reduce_mean(tf.log(tf.cosh(dINN2t - temp_inn2t))) Loss2dR = tf.reduce_mean(tf.log(tf.cosh(dRNN2t - temp_rnn2t))) Loss2dN = tf.reduce_mean(tf.log(tf.cosh(dN_NN2t))) if R['regular_weight_model'] == 'L1': regular_WB2S = DNN_base.regular_weights_biases_L1( Weight2S, Bias2S) regular_WB2I = DNN_base.regular_weights_biases_L1( Weight2I, Bias2I) regular_WB2R = DNN_base.regular_weights_biases_L1( Weight2R, Bias2R) regular_WB2Beta = DNN_base.regular_weights_biases_L1( Weight2beta, Bias2beta) regular_WB2Gamma = DNN_base.regular_weights_biases_L1( Weight2gamma, Bias2gamma) elif R['regular_weight_model'] == 'L2': regular_WB2S = DNN_base.regular_weights_biases_L2( Weight2S, Bias2S) regular_WB2I = DNN_base.regular_weights_biases_L2( Weight2I, Bias2I) regular_WB2R = DNN_base.regular_weights_biases_L2( Weight2R, Bias2R) regular_WB2Beta = DNN_base.regular_weights_biases_L2( Weight2beta, Bias2beta) regular_WB2Gamma = DNN_base.regular_weights_biases_L2( Weight2gamma, Bias2gamma) else: regular_WB2S = tf.constant(0.0) regular_WB2I = tf.constant(0.0) regular_WB2R = tf.constant(0.0) regular_WB2Beta = tf.constant(0.0) regular_WB2Gamma = tf.constant(0.0) PWB2S = wb_penalty * regular_WB2S PWB2I = wb_penalty * regular_WB2I PWB2R = wb_penalty * regular_WB2R PWB2Beta = wb_penalty * regular_WB2Beta PWB2Gamma = wb_penalty * regular_WB2Gamma Loss2S = Loss2dS + PWB2S Loss2I = predict_true_penalty * LossI_Net_obs + Loss2dI + PWB2I Loss2R = Loss2dR + PWB2R Loss2N = predict_true_penalty * LossN_Net_obs + Loss2dN Loss = Loss2S + Loss2I + Loss2R + Loss2N + PWB2Beta + PWB2Gamma my_optimizer = tf.train.AdamOptimizer(in_learning_rate) if R['train_model'] == 'train_group': train_Loss2S = my_optimizer.minimize(Loss2S, global_step=global_steps) train_Loss2I = my_optimizer.minimize(Loss2I, global_step=global_steps) train_Loss2R = my_optimizer.minimize(Loss2R, global_step=global_steps) train_Loss2N = my_optimizer.minimize(Loss2N, global_step=global_steps) train_Loss = my_optimizer.minimize(Loss, global_step=global_steps) train_Losses = tf.group(train_Loss2S, train_Loss2I, train_Loss2R, train_Loss2N, train_Loss) elif R['train_model'] == 'train_union_loss': train_Losses = my_optimizer.minimize(Loss, global_step=global_steps) t0 = time.time() loss_s_all, loss_i_all, loss_r_all, loss_n_all, loss_all = [], [], [], [], [] test_epoch = [] test_mse2I_all, test_rel2I_all = [], [] # filename = 'data2csv/Wuhan.csv' # filename = 'data2csv/Italia_data.csv' filename = 'data2csv/Korea_data.csv' date, data = DNN_data.load_csvData(filename) assert (trainSet_szie + batchSize_test <= len(data)) train_date, train_data2i, test_date, test_data2i = \ DNN_data.split_csvData2train_test(date, data, size2train=trainSet_szie, normalFactor=R['scale_population']) if R['scale_population'] == 1: nbatch2train = np.ones(batchSize_train, dtype=np.float32) * float( R['total_population']) elif (R['total_population'] != R['scale_population']) and R['scale_population'] != 1: nbatch2train = np.ones(batchSize_train, dtype=np.float32) * ( float(R['total_population']) / float(R['scale_population'])) elif (R['total_population'] == R['scale_population']) and R['scale_population'] != 1: nbatch2train = np.ones(batchSize_train, dtype=np.float32) # 对于时间数据来说,验证模型的合理性,要用连续的时间数据验证 test_t_bach = DNN_data.sample_testDays_serially(test_date, batchSize_test) i_obs_test = DNN_data.sample_testData_serially(test_data2i, batchSize_test, normalFactor=1.0) print('The test data about i:\n', str(np.transpose(i_obs_test))) print('\n') DNN_tools.log_string( 'The test data about i:\n%s\n' % str(np.transpose(i_obs_test)), log_fileout) # ConfigProto 加上allow_soft_placement=True就可以使用 gpu 了 config = tf.ConfigProto(allow_soft_placement=True) # 创建sess的时候对sess进行参数配置 config.gpu_options.allow_growth = True # True是让TensorFlow在运行过程中动态申请显存,避免过多的显存占用。 config.allow_soft_placement = True # 当指定的设备不存在时,允许选择一个存在的设备运行。比如gpu不存在,自动降到cpu上运行 with tf.Session(config=config) as sess: sess.run(tf.global_variables_initializer()) tmp_lr = learning_rate for i_epoch in range(R['max_epoch'] + 1): t_batch, i_obs = \ DNN_data.randSample_Normalize_existData(train_date, train_data2i, batchsize=batchSize_train, normalFactor=1.0, sampling_opt=R['opt2sample']) n_obs = nbatch2train.reshape(batchSize_train, 1) tmp_lr = tmp_lr * (1 - lr_decay) train_option = True if R['activate_stage_penalty'] == 1: if i_epoch < int(R['max_epoch'] / 10): temp_penalty_pt = pt_penalty_init elif i_epoch < int(R['max_epoch'] / 5): temp_penalty_pt = 10 * pt_penalty_init elif i_epoch < int(R['max_epoch'] / 4): temp_penalty_pt = 50 * pt_penalty_init elif i_epoch < int(R['max_epoch'] / 2): temp_penalty_pt = 100 * pt_penalty_init elif i_epoch < int(3 * R['max_epoch'] / 4): temp_penalty_pt = 200 * pt_penalty_init else: temp_penalty_pt = 500 * pt_penalty_init elif R['activate_stage_penalty'] == 2: if i_epoch < int(R['max_epoch'] / 3): temp_penalty_pt = pt_penalty_init elif i_epoch < 2 * int(R['max_epoch'] / 3): temp_penalty_pt = 10 * pt_penalty_init else: temp_penalty_pt = 50 * pt_penalty_init else: temp_penalty_pt = pt_penalty_init _, loss_s, loss_i, loss_r, loss_n, loss, pwb2s, pwb2i, pwb2r = sess.run( [ train_Losses, Loss2S, Loss2I, Loss2R, Loss2N, Loss, PWB2S, PWB2I, PWB2R ], feed_dict={ T_it: t_batch, I_observe: i_obs, N_observe: n_obs, in_learning_rate: tmp_lr, train_opt: train_option, predict_true_penalty: temp_penalty_pt }) loss_s_all.append(loss_s) loss_i_all.append(loss_i) loss_r_all.append(loss_r) loss_n_all.append(loss_n) loss_all.append(loss) if i_epoch % 1000 == 0: # 以下代码为输出训练过程中 S_NN, I_NN, R_NN, beta, gamma 的训练结果 DNN_LogPrint.print_and_log2train(i_epoch, time.time() - t0, tmp_lr, temp_penalty_pt, pwb2s, pwb2i, pwb2r, loss_s, loss_i, loss_r, loss_n, loss, log_out=log_fileout) s_nn2train, i_nn2train, r_nn2train = sess.run( [SNN, INN, RNN], feed_dict={T_it: np.reshape(train_date, [-1, 1])}) # 以下代码为输出训练过程中 S_NN, I_NN, R_NN, beta, gamma 的测试结果 test_epoch.append(i_epoch / 1000) train_option = False s_nn2test, i_nn2test, r_nn2test, beta_test, gamma_test = sess.run( [SNN, INN, RNN, beta, gamma], feed_dict={ T_it: test_t_bach, train_opt: train_option }) point_ERR2I = np.square(i_nn2test - i_obs_test) test_mse2I = np.mean(point_ERR2I) test_mse2I_all.append(test_mse2I) test_rel2I = test_mse2I / np.mean(np.square(i_obs_test)) test_rel2I_all.append(test_rel2I) DNN_tools.print_and_log_test_one_epoch(test_mse2I, test_rel2I, log_out=log_fileout) DNN_tools.log_string( '------------------The epoch----------------------: %s\n' % str(i_epoch), log2testSolus) DNN_tools.log_string( 'The test result for s:\n%s\n' % str(np.transpose(s_nn2test)), log2testSolus) DNN_tools.log_string( 'The test result for i:\n%s\n' % str(np.transpose(i_nn2test)), log2testSolus) DNN_tools.log_string( 'The test result for r:\n%s\n\n' % str(np.transpose(r_nn2test)), log2testSolus) # --------以下代码为输出训练过程中 S_NN_temp, I_NN_temp, R_NN_temp, in_beta, in_gamma 的测试结果------------- s_nn_temp2test, i_nn_temp2test, r_nn_temp2test, in_beta_test, in_gamma_test = sess.run( [SNN_temp, INN_temp, RNN_temp, in_beta, in_gamma], feed_dict={ T_it: test_t_bach, train_opt: train_option }) DNN_tools.log_string( '------------------The epoch----------------------: %s\n' % str(i_epoch), log2testSolus2) DNN_tools.log_string( 'The test result for s_temp:\n%s\n' % str(np.transpose(s_nn_temp2test)), log2testSolus2) DNN_tools.log_string( 'The test result for i_temp:\n%s\n' % str(np.transpose(i_nn_temp2test)), log2testSolus2) DNN_tools.log_string( 'The test result for r_temp:\n%s\n\n' % str(np.transpose(r_nn_temp2test)), log2testSolus2) DNN_tools.log_string( '------------------The epoch----------------------: %s\n' % str(i_epoch), log2testParas) DNN_tools.log_string( 'The test result for in_beta:\n%s\n' % str(np.transpose(in_beta_test)), log2testParas) DNN_tools.log_string( 'The test result for in_gamma:\n%s\n' % str(np.transpose(in_gamma_test)), log2testParas) DNN_tools.log_string( 'The train result for S:\n%s\n' % str(np.transpose(s_nn2train)), log2trianSolus) DNN_tools.log_string( 'The train result for I:\n%s\n' % str(np.transpose(i_nn2train)), log2trianSolus) DNN_tools.log_string( 'The train result for R:\n%s\n\n' % str(np.transpose(r_nn2train)), log2trianSolus) saveData.true_value2convid(train_data2i, name2Array='itrue2train', outPath=R['FolderName']) saveData.save_Solu2mat_Covid(s_nn2train, name2solus='s2train', outPath=R['FolderName']) saveData.save_Solu2mat_Covid(i_nn2train, name2solus='i2train', outPath=R['FolderName']) saveData.save_Solu2mat_Covid(r_nn2train, name2solus='r2train', outPath=R['FolderName']) saveData.save_SIR_trainLoss2mat_Covid(loss_s_all, loss_i_all, loss_r_all, loss_n_all, actName=act_func2SIR, outPath=R['FolderName']) plotData.plotTrain_loss_1act_func(loss_s_all, lossType='loss2s', seedNo=R['seed'], outPath=R['FolderName'], yaxis_scale=True) plotData.plotTrain_loss_1act_func(loss_i_all, lossType='loss2i', seedNo=R['seed'], outPath=R['FolderName'], yaxis_scale=True) plotData.plotTrain_loss_1act_func(loss_r_all, lossType='loss2r', seedNo=R['seed'], outPath=R['FolderName'], yaxis_scale=True) plotData.plotTrain_loss_1act_func(loss_n_all, lossType='loss2n', seedNo=R['seed'], outPath=R['FolderName'], yaxis_scale=True) saveData.true_value2convid(i_obs_test, name2Array='i_true2test', outPath=R['FolderName']) saveData.save_testMSE_REL2mat(test_mse2I_all, test_rel2I_all, actName='Infected', outPath=R['FolderName']) plotData.plotTest_MSE_REL(test_mse2I_all, test_rel2I_all, test_epoch, actName='Infected', seedNo=R['seed'], outPath=R['FolderName'], yaxis_scale=True) saveData.save_SIR_testSolus2mat_Covid(s_nn2test, i_nn2test, r_nn2test, name2solus1='snn2test', name2solus2='inn2test', name2solus3='rnn2test', outPath=R['FolderName']) saveData.save_SIR_testParas2mat_Covid(beta_test, gamma_test, name2para1='beta2test', name2para2='gamma2test', outPath=R['FolderName']) plotData.plot_testSolu2convid(i_obs_test, name2solu='i_true', coord_points2test=test_t_bach, outPath=R['FolderName']) plotData.plot_testSolu2convid(s_nn2test, name2solu='s_test', coord_points2test=test_t_bach, outPath=R['FolderName']) plotData.plot_testSolu2convid(i_nn2test, name2solu='i_test', coord_points2test=test_t_bach, outPath=R['FolderName']) plotData.plot_testSolu2convid(r_nn2test, name2solu='r_test', coord_points2test=test_t_bach, outPath=R['FolderName']) plotData.plot_testSolus2convid(i_obs_test, i_nn2test, name2solu1='i_true', name2solu2='i_test', coord_points2test=test_t_bach, seedNo=R['seed'], outPath=R['FolderName']) plotData.plot_testSolu2convid(beta_test, name2solu='beta_test', coord_points2test=test_t_bach, outPath=R['FolderName']) plotData.plot_testSolu2convid(gamma_test, name2solu='gamma_test', coord_points2test=test_t_bach, outPath=R['FolderName'])
def f(x): return tf.cosh(x)
def eval(self, x): return tf.cosh(x)
def ttfsinh(y, x): d[y] = d[x] * tf.cosh(x)
def eXclusiveConvolutionalAutoencoder( input_shape=[None, 28, 28, 1], layers=[ { 'n_channels': 144, 'reconstructive_regularizer': 1.0, 'weight_decay': 1.0, 'sparse_regularizer': 1.0, 'sparsity_level': 0.05, 'exclusive_regularizer': 1.0, 'exclusive_scale': 1.0, 'tied_weight': True, 'conv_size': 8, 'conv_stride': 1, 'conv_padding': 'VALID', # 'pool_size': 0, # 'pool_stride': 0, # 'pool_padding': 'VALID', 'corrupt_prob': 0.5, 'exclusive_type': 'logcosh', 'exclusive_scale': 1.0, # 'gaussian_mean': 0.0, # 'gaussian_std': 0.0, 'encode': 'sigmoid', 'decode': 'linear', 'pathways': [ range(0, 72), range(0, 144), ], }, ], init_encoder_weight=None, init_decoder_weight=None, init_encoder_bias=None, init_decoder_bias=None, ): '''Build a deep denoising autoencoder w/ tied weights. Parameters ---------- input_shape : list, optional Description n_channels : list, optional Description filter_sizes : list, optional Description Returns ------- x : Tensor Input placeholder to the network z : Tensor Inner-most latent representation y : Tensor Output reconstruction of the input cost : Tensor Overall cost to use for training Raises ------ ValueError Description ''' # %% n_channels = [input_shape[3]] for layer in layers: n_channels.append(layer['n_channels']) assert len(layer['pathways']) == len( layers[0] ['pathways']), 'Ambiguous pathway definitions over layers.' # %% input to the network training_x = [] training_x_tensor = [] for pathway_i in range(len(layers[0]['pathways'])): # ensure 2-d is converted to square tensor. x = tf.placeholder(tf.float32, input_shape, name='training_x' + str(pathway_i)) training_x.append(x) if len(x.get_shape()) == 2: x_dim = np.sqrt(x.get_shape().as_list()[1]) if x_dim != int(x_dim): raise ValueError('Unsupported input dimensions') x_dim = int(x_dim) x_tensor = tf.reshape(x, [-1, x_dim, x_dim, n_channels[0]]) elif len(x.get_shape()) == 4: x_tensor = x else: raise ValueError('Unsupported input dimensions') training_x_tensor.append(x_tensor) # input to the network x = tf.placeholder(tf.float32, input_shape, name='x') # %% # ensure 2-d is converted to square tensor. if len(x.get_shape()) == 2: x_dim = np.sqrt(x.get_shape().as_list()[1]) if x_dim != int(x_dim): raise ValueError('Unsupported input dimensions') x_dim = int(x_dim) x_tensor = tf.reshape(x, [-1, x_dim, x_dim, n_channels[0]]) elif len(x.get_shape()) == 4: x_tensor = x else: raise ValueError('Unsupported input dimensions') # current_input = x current_input = x_tensor training_current_input = [] for pathway_i in range(len(layers[0]['pathways'])): training_current_input.append(training_x_tensor[pathway_i]) # %% # Build the encoder encoder_weight = [] encoder_bias = [] training_shape_list = [] shape_list = [] training_encoder_output_list = [] training_encoder_input_list = [] layerwise_z = [] training_argmax_list = [] argmax_list = [] for layer_i, (n_input, n_output) in enumerate(zip(n_channels[:-1], n_channels[1:])): training_shape_list.append([]) shape_list.append(current_input.get_shape().as_list()) if init_encoder_weight != None: W = tf.Variable(tf.constant(init_encoder_weight[layer_i])) else: W = tf.Variable( tf.random_uniform([ layers[layer_i]['conv_size'], layers[layer_i]['conv_size'], n_input, n_output ], -1.0 / math.sqrt(n_input), 1.0 / math.sqrt(n_input))) if init_encoder_bias != None: b = tf.Variable(tf.constant(init_encoder_bias[layer_i])) else: b = tf.Variable(tf.zeros([n_output])) encoder_weight.append(W) encoder_bias.append(b) if 'pool_size' in layers[layer_i] and 'pool_stride' in layers[ layer_i] and 'pool_padding' in layers[layer_i]: training_argmax_list.append([]) training_encoder_input_list.append([]) training_encoder_output = [] for pathway_i in range(len(layers[layer_i]['pathways'])): training_encoder_input_list[-1].append( corrupt(training_current_input[pathway_i]) * layers[layer_i]['corrupt_prob'] + training_current_input[pathway_i] * (1 - layers[layer_i]['corrupt_prob']) if layers[layer_i]['corrupt_prob'] != None else training_current_input[pathway_i]) training_shape_list[-1].append( training_current_input[pathway_i]._shape_as_list()) a = activate_function( tf.add( tf.nn.conv2d( training_current_input[pathway_i], W, strides=[ 1, layers[layer_i]['conv_stride'], layers[layer_i]['conv_stride'], 1 ], padding=layers[layer_i]['conv_padding'], ), b), layers[layer_i]['encode']) if 'pool_size' in layers[layer_i] and 'pool_stride' in layers[ layer_i] and 'pool_padding' in layers[layer_i]: a, argmax = max_pool_with_argmax( a, ksize=[ 1, layers[layer_i]['pool_size'], layers[layer_i]['pool_size'], 1 ], strides=[ 1, layers[layer_i]['pool_stride'], layers[layer_i]['pool_stride'], 1 ], padding=layers[layer_i]['pool_padding']) training_argmax_list[-1].append(argmax) training_encoder_output.append(a) training_encoder_output_list.append(training_encoder_output) training_current_input = training_encoder_output output = activate_function( tf.add( tf.nn.conv2d(current_input, W, strides=[ 1, layers[layer_i]['conv_stride'], layers[layer_i]['conv_stride'], 1 ], padding=layers[layer_i]['conv_padding']), b), layers[layer_i]['encode']) if 'pool_size' in layers[layer_i] and 'pool_stride' in layers[ layer_i] and 'pool_padding' in layers[layer_i]: output, argmax = max_pool_with_argmax( output, ksize=[ 1, layers[layer_i]['pool_size'], layers[layer_i]['pool_size'], 1 ], strides=[ 1, layers[layer_i]['pool_stride'], layers[layer_i]['pool_stride'], 1 ], padding=layers[layer_i]['pool_padding']) argmax_list.append(argmax) layerwise_z.append(output) current_input = output # %% latent representation training_z = training_encoder_output z = current_input decoder_weight = [] decoder_bias = [] training_decoder_output_list = [] # layerwise_training_decoder_input_list = [] layerwise_training_decoder_output_list = [] layerwise_y = [] # %% Build the decoder using the same weights for layer_i, (n_input, n_output, training_shape, shape) in enumerate( zip(n_channels[::-1][:-1], n_channels[::-1][1:], training_shape_list[::-1], shape_list[::-1])): if init_decoder_weight != None: W = tf.Variable(tf.constant(init_decoder_weight[::-1][layer_i])) else: if layers[layer_i]['tied_weight'] == True: W = encoder_weight[::-1][layer_i] else: W = tf.Variable( tf.random_uniform([ layers[::-1][layer_i]['conv_size'], layers[::-1][layer_i]['conv_size'], n_output, n_input ], -1.0 / math.sqrt(n_input), 1.0 / math.sqrt(n_input))) if init_decoder_bias != None: b = tf.Variable(tf.constant(init_decoder_bias[::-1][layer_i])) else: b = tf.Variable(tf.zeros([n_output])) decoder_weight.append(W) decoder_bias.append(b) training_decoder_output = [] # layerwise_training_decoder_input_list.append([]) layerwise_training_decoder_output = [] for pathway_i in range(len(layers[::-1][layer_i]['pathways'])): # layerwise_training_decoder_input_list[-1].append(tf.identity(training_encoder_output_list[::-1][layer_i][pathway_i])) training_current_input_pathway_i = training_current_input[ pathway_i] if 'pool_size' in layers[layer_i] and 'pool_stride' in layers[ layer_i] and 'pool_padding' in layers[layer_i]: training_current_input_pathway_i = unpool_with_argmax( training_current_input_pathway_i, training_argmax_list[::-1][layer_i][pathway_i], ksize=[ 1, layers[::-1][layer_i]['pool_size'], layers[::-1][layer_i]['pool_size'], 1 ]) a = activate_function( tf.add( tf.nn.conv2d_transpose( tf.gather(training_current_input_pathway_i, layers[::-1][layer_i]['pathways'][pathway_i], axis=3), # training_current_input[pathway_i], tf.gather(W, layers[::-1][layer_i]['pathways'][pathway_i], axis=3), # W, tf.stack([ tf.shape(training_current_input[pathway_i])[0], training_shape[pathway_i][1], training_shape[pathway_i][2], n_output, ]), strides=[ 1, layers[::-1][layer_i]['conv_stride'], layers[::-1][layer_i]['conv_stride'], 1 ], padding=layers[::-1][layer_i]['conv_padding'], ), b), layers[::-1][layer_i]['decode']) training_decoder_output.append(a) training_encoder_output_list_rev_layer_i_pathway_i = training_encoder_output_list[::-1][ layer_i][pathway_i] if 'pool_size' in layers[layer_i] and 'pool_stride' in layers[ layer_i] and 'pool_padding' in layers[layer_i]: # layerwise_training_decoder_input_list[-1][-1] = unpool_with_argmax( training_encoder_output_list_rev_layer_i_pathway_i = unpool_with_argmax( # layerwise_training_decoder_input_list[-1][-1], training_encoder_output_list_rev_layer_i_pathway_i, training_argmax_list[::-1][layer_i][pathway_i], ksize=[ 1, layers[::-1][layer_i]['pool_size'], layers[::-1][layer_i]['pool_size'], 1 ]) layerwise_a = activate_function( tf.add( tf.nn.conv2d_transpose( # tf.gather(layerwise_training_decoder_input_list[-1][-1], layers[::-1][layer_i]['pathways'][pathway_i], axis=3), tf.gather( training_encoder_output_list_rev_layer_i_pathway_i, layers[::-1][layer_i]['pathways'][pathway_i], axis=3), # layerwise_training_decoder_input_list[-1][-1], tf.gather(W, layers[::-1][layer_i]['pathways'][pathway_i], axis=3), # W, tf.stack([ tf.shape(training_encoder_output_list[::-1] [layer_i][pathway_i])[0], training_shape[pathway_i][1], training_shape[pathway_i][2], n_output, ]), strides=[ 1, layers[::-1][layer_i]['conv_stride'], layers[::-1][layer_i]['conv_stride'], 1 ], padding=layers[::-1][layer_i]['conv_padding'], ), b), layers[::-1][layer_i]['decode']) layerwise_training_decoder_output.append(layerwise_a) training_decoder_output_list.append(training_decoder_output) layerwise_training_decoder_output_list.append( layerwise_training_decoder_output) training_current_input = training_decoder_output if 'pool_size' in layers[layer_i] and 'pool_stride' in layers[ layer_i] and 'pool_padding' in layers[layer_i]: current_input = unpool_with_argmax( current_input, argmax_list[::-1][layer_i], ksize=[ 1, layers[::-1][layer_i]['pool_size'], layers[::-1][layer_i]['pool_size'], 1 ]) output = activate_function( tf.add( tf.nn.conv2d_transpose( current_input, W, tf.stack([tf.shape(x)[0], shape[1], shape[2], shape[3]]), strides=[ 1, layers[::-1][layer_i]['conv_stride'], layers[::-1][layer_i]['conv_stride'], 1 ], padding=layers[::-1][layer_i]['conv_padding']), b), layers[::-1][layer_i]['decode']) current_input = output for layer_i in range(len(layers)): layerwise_input = layerwise_z[::-1][layer_i] for layer_j in range(len(layers) - layer_i)[::-1]: if 'pool_size' in layers[layer_j] and 'pool_stride' in layers[ layer_j] and 'pool_padding' in layers[layer_j]: layerwise_input = unpool_with_argmax( layerwise_input, argmax_list[layer_j], ksize=[ 1, layers[layer_j]['pool_size'], layers[layer_j]['pool_size'], 1 ]) layerwise_output = activate_function( tf.add( tf.nn.conv2d_transpose( layerwise_input, decoder_weight[::-1][layer_j], tf.stack([ tf.shape(x)[0], shape_list[layer_j][1], shape_list[layer_j][2], shape_list[layer_j][3] ]), strides=[ 1, layers[layer_j]['conv_stride'], layers[layer_j]['conv_stride'], 1 ], padding=layers[layer_j]['conv_padding']), decoder_bias[::-1][layer_j]), layers[layer_j]['decode']) layerwise_input = layerwise_output layerwise_y.append(layerwise_output) decoder_weight.reverse() decoder_bias.reverse() training_decoder_output_list.reverse() layerwise_training_decoder_output_list.reverse() layerwise_y.reverse() # %% now have the reconstruction through the network training_y = training_current_input y = current_input # cost function measures pixel-wise difference cost = {} cost['reconstruction_error'] = tf.constant(0.0) # for layer_i in range(len(layers)): for pathway_i in range(len(layers[0]['pathways'])): if training_encoder_input_list[0][ pathway_i] != None and training_decoder_output_list[0][ pathway_i] != None: cost['reconstruction_error'] = tf.add( cost['reconstruction_error'], layers[0]['reconstructive_regularizer'] * 0.5 * tf.reduce_mean( tf.square( tf.subtract( training_encoder_input_list[0][pathway_i], training_decoder_output_list[0][pathway_i], )))) cost['weight_decay'] = tf.constant(0.0) for layer_i in range(len(layers)): cost_encoder_weight_decay = layers[layer_i][ 'weight_decay'] * 0.5 * tf.reduce_mean( tf.square(encoder_weight[layer_i])) if layers[layer_i]['tied_weight']: cost['weight_decay'] = tf.add(cost['weight_decay'], cost_encoder_weight_decay) else: cost_decoder_weight_decay = layers[layer_i][ 'weight_decay'] * 0.5 * tf.reduce_mean( tf.square(decoder_weight[layer_i])) cost['weight_decay'] = tf.add( cost['weight_decay'], 0.5 * cost_encoder_weight_decay + 0.5 * cost_decoder_weight_decay) cost['exclusivity'] = tf.constant(0.0) for layer_i in range(len(layers)): for pathway_i, (encoder_pathway_output) in enumerate( training_encoder_output_list[layer_i]): exclusivity = np.setdiff1d( range(layers[layer_i]['n_channels']), layers[layer_i]['pathways'][pathway_i]).tolist() if exclusivity != [] and encoder_pathway_output != None: if layers[layer_i]['exclusive_type'] == 'pow4': if not 'gaussian_mean' in layers[ layer_i] or not 'gaussian_std' in layers[layer_i]: cost['exclusivity'] = tf.add( cost['exclusivity'], layers[layer_i]['exclusive_regularizer'] * 0.5 * tf.square( tf.reduce_mean( tf.pow( tf.gather( tf.reduce_mean( encoder_pathway_output, [0, 1, 2]), exclusivity), 4), ))) else: cost['exclusivity'] = tf.add( cost['exclusivity'], layers[layer_i]['exclusive_regularizer'] * 0.5 * tf.square( tf.reduce_mean( tf.subtract( tf.pow( tf.gather( tf.reduce_mean( encoder_pathway_output, [0, 1, 2]), exclusivity), 4), tf.pow( tf.random_normal( [len(exclusivity)], mean=layers[layer_i] ['gaussian_mean'], stddev=layers[layer_i] ['gaussian_std']), 4))))) elif layers[layer_i]['exclusive_type'] == 'exp': if not 'gaussian_mean' in layers[ layer_i] or not 'gaussian_std' in layers[layer_i]: cost['exclusivity'] = tf.add( cost['exclusivity'], layers[layer_i]['exclusive_regularizer'] * 0.5 * tf.square( tf.reduce_mean( (-1 / layers[layer_i]['exclusive_scale']) * tf.exp(-0.5 * layers[layer_i]['exclusive_scale'] * tf.square( tf.gather( tf.reduce_mean( encoder_pathway_output, [0, 1, 2]), exclusivity))), ))) else: cost['exclusivity'] = tf.add( cost['exclusivity'], layers[layer_i]['exclusive_regularizer'] * 0.5 * tf.square( tf.reduce_mean( tf.subtract( (-1 / layers[layer_i]['exclusive_scale']) * tf.exp(-0.5 * layers[layer_i][ 'exclusive_scale'] * tf.square( tf.gather( tf.reduce_mean( encoder_pathway_output, [0, 1, 2]), exclusivity))), (-1 / layers[layer_i]['exclusive_scale']) * tf.exp(-0.5 * layers[layer_i] ['exclusive_scale'] * tf.square( tf.random_normal( [len(exclusivity)], mean=layers[layer_i] ['gaussian_mean'], stddev=layers[layer_i] ['gaussian_std']))), )))) elif layers[layer_i]['exclusive_type'] == 'logcosh': if not 'gaussian_mean' in layers[ layer_i] or not 'gaussian_std' in layers[layer_i]: cost['exclusivity'] = tf.add( cost['exclusivity'], layers[layer_i]['exclusive_regularizer'] * 0.5 * tf.square( tf.reduce_mean( (1 / layers[layer_i]['exclusive_scale']) * tf.log( tf.cosh( layers[layer_i]['exclusive_scale'] * tf.gather( tf.reduce_mean( encoder_pathway_output, [0, 1, 2]), exclusivity))), ))) else: cost['exclusivity'] = tf.add( cost['exclusivity'], layers[layer_i]['exclusive_regularizer'] * 0.5 * tf.square( tf.reduce_mean( tf.subtract( (1 / layers[layer_i]['exclusive_scale']) * tf.log( tf.cosh(layers[layer_i][ 'exclusive_scale'] * tf.gather( tf.reduce_mean( encoder_pathway_output, [0, 1, 2]), exclusivity))), (1 / layers[layer_i]['exclusive_scale']) * tf.log( tf.cosh(layers[layer_i] ['exclusive_scale'] * tf.random_normal( [len(exclusivity)], mean=layers[layer_i] ['gaussian_mean'], stddev=layers[layer_i] ['gaussian_std']))))))) cost['sparsity'] = tf.constant(0.0) for layer_i in range(len(layers)): for pathway_i, (encoder_pathway_output) in enumerate( training_encoder_output_list[layer_i]): if layers[layer_i]['pathways'][ pathway_i] != None and encoder_pathway_output != None: cost['sparsity'] = tf.add( cost['sparsity'], layers[layer_i]['sparse_regularizer'] * tf.reduce_mean( kl_divergence( layers[layer_i]['sparsity_level'], tf.gather( tf.reduce_mean(encoder_pathway_output, [0, 1, 2]), layers[layer_i]['pathways'][pathway_i])))) cost['total'] = cost['reconstruction_error'] + cost['weight_decay'] + cost[ 'sparsity'] + cost['exclusivity'] layerwise_cost = [] for layer_i in range(len(layers)): layerwise_cost.append({}) layerwise_cost[layer_i]['reconstruction_error'] = tf.constant(0.0) for pathway_i in range(len(layers[layer_i]['pathways'])): if training_encoder_input_list[layer_i][ pathway_i] != None and layerwise_training_decoder_output_list[ layer_i][pathway_i] != None: layerwise_cost[layer_i]['reconstruction_error'] = tf.add( layerwise_cost[layer_i]['reconstruction_error'], layers[layer_i]['reconstructive_regularizer'] * 0.5 * tf.reduce_mean( tf.square( tf.subtract( training_encoder_input_list[layer_i] [pathway_i], layerwise_training_decoder_output_list[layer_i] [pathway_i], )))) layerwise_cost[layer_i]['weight_decay'] = tf.constant(0.0) layerwise_cost_encoder_weight_decay = layers[layer_i][ 'weight_decay'] * 0.5 * tf.reduce_mean( tf.square(encoder_weight[layer_i])) if layers[layer_i]['tied_weight']: layerwise_cost[layer_i]['weight_decay'] = tf.add( layerwise_cost[layer_i]['weight_decay'], layerwise_cost_encoder_weight_decay) else: layerwise_cost_decoder_weight_decay = layers[layer_i][ 'weight_decay'] * 0.5 * tf.reduce_mean( tf.square(decoder_weight[layer_i])) layerwise_cost[layer_i]['weight_decay'] = tf.add( layerwise_cost[layer_i]['weight_decay'], 0.5 * layerwise_cost_encoder_weight_decay + 0.5 * layerwise_cost_decoder_weight_decay) layerwise_cost[layer_i]['exclusivity'] = tf.constant(0.0) for pathway_i, (encoder_pathway_output) in enumerate( training_encoder_output_list[layer_i]): exclusivity = np.setdiff1d( range(layers[layer_i]['n_channels']), layers[layer_i]['pathways'][pathway_i]).tolist() if exclusivity != [] and encoder_pathway_output != None: if layers[layer_i]['exclusive_type'] == 'pow4': if not 'gaussian_mean' in layers[ layer_i] or not 'gaussian_std' in layers[layer_i]: layerwise_cost[layer_i]['exclusivity'] = tf.add( layerwise_cost[layer_i]['exclusivity'], layers[layer_i]['exclusive_regularizer'] * 0.5 * tf.square( tf.reduce_mean( tf.pow( tf.gather( tf.reduce_mean( encoder_pathway_output, [0, 1, 2]), exclusivity), 4), ))) else: layerwise_cost[layer_i]['exclusivity'] = tf.add( layerwise_cost[layer_i]['exclusivity'], layers[layer_i]['exclusive_regularizer'] * 0.5 * tf.square( tf.reduce_mean( tf.subtract( tf.pow( tf.gather( tf.reduce_mean( encoder_pathway_output, [0, 1, 2]), exclusivity), 4), tf.pow( tf.random_normal( [len(exclusivity)], mean=layers[layer_i] ['gaussian_mean'], stddev=layers[layer_i] ['gaussian_std']), 4))))) elif layers[layer_i]['exclusive_type'] == 'exp': if not 'gaussian_mean' in layers[ layer_i] or not 'gaussian_std' in layers[layer_i]: layerwise_cost[layer_i]['exclusivity'] = tf.add( layerwise_cost[layer_i]['exclusivity'], layers[layer_i]['exclusive_regularizer'] * 0.5 * tf.square( tf.reduce_mean( (-1 / layers[layer_i]['exclusive_scale']) * tf.exp(-0.5 * layers[layer_i]['exclusive_scale'] * tf.square( tf.gather( tf.reduce_mean( encoder_pathway_output, [0, 1, 2]), exclusivity))), ))) else: layerwise_cost[layer_i]['exclusivity'] = tf.add( layerwise_cost[layer_i]['exclusivity'], layers[layer_i]['exclusive_regularizer'] * 0.5 * tf.square( tf.reduce_mean( tf.subtract( (-1 / layers[layer_i]['exclusive_scale']) * tf.exp(-0.5 * layers[layer_i][ 'exclusive_scale'] * tf.square( tf.gather( tf.reduce_mean( encoder_pathway_output, [0, 1, 2]), exclusivity))), (-1 / layers[layer_i]['exclusive_scale']) * tf.exp(-0.5 * layers[layer_i] ['exclusive_scale'] * tf.square( tf.random_normal( [len(exclusivity)], mean=layers[layer_i] ['gaussian_mean'], stddev=layers[layer_i] ['gaussian_std']))))))) elif layers[layer_i]['exclusive_type'] == 'logcosh': if not 'gaussian_mean' in layers[ layer_i] or not 'gaussian_std' in layers[layer_i]: layerwise_cost[layer_i]['exclusivity'] = tf.add( layerwise_cost[layer_i]['exclusivity'], layers[layer_i]['exclusive_regularizer'] * 0.5 * tf.square( tf.reduce_mean( (1 / layers[layer_i]['exclusive_scale']) * tf.log( tf.cosh( layers[layer_i]['exclusive_scale'] * tf.gather( tf.reduce_mean( encoder_pathway_output, [0, 1, 2]), exclusivity))), ))) else: layerwise_cost[layer_i]['exclusivity'] = tf.add( layerwise_cost[layer_i]['exclusivity'], layers[layer_i]['exclusive_regularizer'] * 0.5 * tf.square( tf.reduce_mean( tf.subtract( (1 / layers[layer_i]['exclusive_scale']) * tf.log( tf.cosh(layers[layer_i][ 'exclusive_scale'] * tf.gather( tf.reduce_mean( encoder_pathway_output, [0, 1, 2]), exclusivity))), (1 / layers[layer_i]['exclusive_scale']) * tf.log( tf.cosh(layers[layer_i] ['exclusive_scale'] * tf.random_normal( [len(exclusivity)], mean=layers[layer_i] ['gaussian_mean'], stddev=layers[layer_i] ['gaussian_std']))))))) layerwise_cost[layer_i]['sparsity'] = tf.constant(0.0) for pathway_i, (encoder_pathway_output) in enumerate( training_encoder_output_list[layer_i]): if layers[layer_i]['pathways'][ pathway_i] != None and encoder_pathway_output != None: layerwise_cost[layer_i]['sparsity'] = tf.add( layerwise_cost[layer_i]['sparsity'], layers[layer_i]['sparse_regularizer'] * tf.reduce_mean( kl_divergence( layers[layer_i]['sparsity_level'], tf.gather( tf.reduce_mean(encoder_pathway_output, [0, 1, 2]), layers[layer_i]['pathways'][pathway_i])))) layerwise_cost[layer_i]['total'] = layerwise_cost[layer_i][ 'reconstruction_error'] + layerwise_cost[layer_i][ 'weight_decay'] + layerwise_cost[layer_i][ 'exclusivity'] + layerwise_cost[layer_i]['sparsity'] # %% return { 'training_x': training_x, 'training_z': training_z, 'training_y': training_y, 'x': x, 'y': y, 'z': z, 'layerwise_y': layerwise_y, 'layerwise_z': layerwise_z, 'cost': cost, 'layerwise_cost': layerwise_cost, 'encoder_weight': encoder_weight, 'decoder_weight': decoder_weight, 'encoder_bias': encoder_bias, 'decoder_bias': decoder_bias, }
def func(x): return tf.cosh(x) + tf.slice(x, [1, 1], [1, 1])