def fit(self, X, y=None): print ("fit X: ", X.shape) print ("fit y: ", y.shape if y is not None else '') """ Split train and test set """ train_datas = self.normal_datas[X] valid_datas = np.vstack([np.delete(self.normal_datas,X,axis=0), self.abnormal_datas]) valid_labels = np.hstack([np.zeros(self.normal_datas.shape[0] - X.shape[0]), np.ones(self.abnormal_datas.shape[0])]) """ Rebuild VAE and train """ global global_best_test_score tf.reset_default_graph() # rebuild VAE vae = VAE(**self.vae_params) #cPickle.dump(vae, open(os.path.join(self.save_candidate_folder, 'vae_class.pkl'), 'wb')) vae.build() """ Normalization """ vae.build_normalize(train_data=train_datas) norm_train_datas = vae.transform_raw_data(raw_data=train_datas) norm_valid_datas = vae.transform_raw_data(raw_data=valid_datas) """ Mini Batchs """ mini_batchs = [norm_train_datas[i:min(i + batch_size, len(norm_train_datas))] for i in range(0, len(norm_train_datas), batch_size)] """ Train """ self.best_test_score, _ = train_vae.train(vae=vae, mini_batchs=mini_batchs, valida_data=norm_valid_datas, valida_label=valid_labels, save_out_model=None, n_epoch=30) # self.best_test_score, _ = train_vae.train(vae=vae, mini_batchs=mini_batchs, test_data=norm_valid_datas, # test_label=valid_labels,save_out_model=os.path.join(self.save_candidate_folder, 'vae_tensor.ckpt')) """ Save result """ print ("Perform training with the below parameters: ") print ("------------------------------------------- ") print (json.dumps(self.vae_params,indent=2)) print ("------------------------------------------- ") print ("Result (F1): ", self.best_test_score)
size=np.floor(.75 * num_samp).astype('int'), replace=False) te_idx = np.setdiff1d(np.arange(num_samp), tr_idx) x_train, x_test = X[tr_idx], X[te_idx] # define VAE half_depth = 3 latent_dim = elliptic_latent.pde.V.dim() repatr_out = False # activation='linear' activation = tf.keras.layers.LeakyReLU(alpha=0.01) optimizer = tf.keras.optimizers.Adam(learning_rate=0.001, amsgrad=True) vae = VAE(x_train.shape[1], half_depth=half_depth, latent_dim=latent_dim, repatr_out=repatr_out, activation=activation, optimizer=optimizer, beta=100) # nll = lambda x: [-elliptic_latent.get_geom(elliptic_latent.prior.gen_vector(x_i.numpy().flatten()))[0] for x_i in x] # # nll = lambda x: tf.map_fn(lambda x_i:-elliptic_latent.get_geom(elliptic_latent.prior.gen_vector(x_i.numpy().flatten()))[0], x) # nll = lambda x,y: [(elliptic_latent.get_geom(elliptic_latent.prior.gen_vector(x[i].numpy().flatten()))[0] # -elliptic.get_geom(elliptic.prior.gen_vector(y[i].numpy().flatten()))[0])**2 for i in range(x.shape[0])] # vae=VAE(x_train.shape[1], half_depth=half_depth, latent_dim=latent_dim, repatr_out=repatr_out, # activation=activation, optimizer=optimizer, custom_loss=nll, run_eagerly=True) # folder=folder+'/saved_model' f_name = [ 'vae_' + i + '_' + algs[alg_no] + str(ensbl_sz) for i in ('fullmodel', 'encoder', 'decoder') ] try:
def main(): parser = argparse.ArgumentParser() parser.add_argument('algNO', nargs='?', type=int, default=0) parser.add_argument('emuNO', nargs='?', type=int, default=1) parser.add_argument('aeNO', nargs='?', type=int, default=0) parser.add_argument('num_samp', nargs='?', type=int, default=5000) parser.add_argument('num_burnin', nargs='?', type=int, default=1000) parser.add_argument('step_sizes', nargs='?', type=float, default=[ .1, 1., .6, None, None ]) # AE [.1,1.,.6] # CAE [.1,.6,.3] # VAE [.3] parser.add_argument('step_nums', nargs='?', type=int, default=[1, 1, 5, 1, 5]) parser.add_argument('algs', nargs='?', type=str, default=[ 'DREAM' + a for a in ('pCN', 'infMALA', 'infHMC', 'infmMALA', 'infmHMC') ]) parser.add_argument('emus', nargs='?', type=str, default=['dnn', 'cnn']) parser.add_argument('aes', nargs='?', type=str, default=['ae', 'cae', 'vae']) args = parser.parse_args() ##------ define the inverse elliptic problem ------## # parameters for PDE model nx = 40 ny = 40 # parameters for prior model sigma = 1.25 s = 0.0625 # parameters for misfit model SNR = 50 # 100 # define the inverse problem elliptic = Elliptic(nx=nx, ny=ny, SNR=SNR, sigma=sigma, s=s) # define the latent (coarser) inverse problem nx = 10 ny = 10 obs, nzsd, loc = [ getattr(elliptic.misfit, i) for i in ('obs', 'nzsd', 'loc') ] elliptic_latent = Elliptic(nx=nx, ny=ny, SNR=SNR, obs=obs, nzsd=nzsd, loc=loc) ##------ define networks ------## # training data algorithms algs = ['EKI', 'EKS'] num_algs = len(algs) alg_no = 1 # load data ensbl_sz = 500 folder = './analysis_f_SNR' + str(SNR) if not os.path.exists(folder): os.makedirs(folder) ##---- EMULATOR ----## # prepare for training data if args.emus[args.emuNO] == 'dnn': loaded = np.load(file=os.path.join( folder, algs[alg_no] + '_ensbl' + str(ensbl_sz) + '_training_XY.npz')) X = loaded['X'] Y = loaded['Y'] elif args.emus[args.emuNO] == 'cnn': loaded = np.load(file=os.path.join( folder, algs[alg_no] + '_ensbl' + str(ensbl_sz) + '_training_XimgY.npz')) X = loaded['X'] Y = loaded['Y'] X = X[:, :, :, None] num_samp = X.shape[0] # n_tr=np.int(num_samp*.75) # x_train,y_train=X[:n_tr],Y[:n_tr] # x_test,y_test=X[n_tr:],Y[n_tr:] tr_idx = np.random.choice(num_samp, size=np.floor(.75 * num_samp).astype('int'), replace=False) te_idx = np.setdiff1d(np.arange(num_samp), tr_idx) x_train, x_test = X[tr_idx], X[te_idx] y_train, y_test = Y[tr_idx], Y[te_idx] # define emulator if args.emus[args.emuNO] == 'dnn': depth = 3 activations = {'hidden': 'softplus', 'output': 'linear'} droprate = .4 optimizer = tf.keras.optimizers.Adam(learning_rate=0.001) emulator = DNN(x_train.shape[1], y_train.shape[1], depth=depth, droprate=droprate, activations=activations, optimizer=optimizer) elif args.emus[args.emuNO] == 'cnn': num_filters = [16, 8, 8] activations = { 'conv': 'softplus', 'latent': 'softmax', 'output': 'linear' } latent_dim = 256 droprate = .5 optimizer = tf.keras.optimizers.Adam(learning_rate=0.001) emulator = CNN(x_train.shape[1:], y_train.shape[1], num_filters=num_filters, latent_dim=latent_dim, droprate=droprate, activations=activations, optimizer=optimizer) f_name = args.emus[args.emuNO] + '_' + algs[alg_no] + str(ensbl_sz) # load emulator try: emulator.model = load_model(os.path.join(folder, f_name + '.h5'), custom_objects={'loss': None}) print(f_name + ' has been loaded!') except: try: emulator.model.load_weights(os.path.join(folder, f_name + '.h5')) print(f_name + ' has been loaded!') except: print('\nNo emulator found. Training {}...\n'.format( args.emus[args.emuNO])) epochs = 200 patience = 0 emulator.train(x_train, y_train, x_test=x_test, y_test=y_test, epochs=epochs, batch_size=64, verbose=1, patience=patience) # save emulator try: emulator.model.save(os.path.join(folder, f_name + '.h5')) except: emulator.model.save_weights( os.path.join(folder, f_name + '.h5')) ##---- AUTOENCODER ----## # prepare for training data if 'c' in args.aes[args.aeNO]: loaded = np.load(file=os.path.join( folder, algs[alg_no] + '_ensbl' + str(ensbl_sz) + '_training_XimgY.npz')) X = loaded['X'] X = X[:, :-1, :-1, None] else: loaded = np.load(file=os.path.join( folder, algs[alg_no] + '_ensbl' + str(ensbl_sz) + '_training_X.npz')) X = loaded['X'] num_samp = X.shape[0] # n_tr=np.int(num_samp*.75) # x_train=X[:n_tr] # x_test=X[n_tr:] tr_idx = np.random.choice(num_samp, size=np.floor(.75 * num_samp).astype('int'), replace=False) te_idx = np.setdiff1d(np.arange(num_samp), tr_idx) x_train, x_test = X[tr_idx], X[te_idx] # define autoencoder if args.aes[args.aeNO] == 'ae': half_depth = 3 latent_dim = elliptic_latent.pde.V.dim() droprate = 0. # activation='linear' activation = tf.keras.layers.LeakyReLU(alpha=2.00) optimizer = tf.keras.optimizers.Adam(learning_rate=0.001, amsgrad=True) lambda_ = 0. autoencoder = AutoEncoder(x_train.shape[1], half_depth=half_depth, latent_dim=latent_dim, droprate=droprate, activation=activation, optimizer=optimizer) elif args.aes[args.aeNO] == 'cae': num_filters = [16, 8] latent_dim = elliptic_latent.prior.dim # activations={'conv':tf.keras.layers.LeakyReLU(alpha=0.1),'latent':None} # [16,1] activations = {'conv': 'elu', 'latent': 'linear'} optimizer = tf.keras.optimizers.Adam(learning_rate=0.001) autoencoder = ConvAutoEncoder(x_train.shape[1:], num_filters=num_filters, latent_dim=latent_dim, activations=activations, optimizer=optimizer) elif args.aes[args.aeNO] == 'vae': half_depth = 5 latent_dim = elliptic_latent.pde.V.dim() repatr_out = False beta = 1. activation = 'elu' # activation=tf.keras.layers.LeakyReLU(alpha=0.01) optimizer = tf.keras.optimizers.Adam(learning_rate=0.001, amsgrad=True) autoencoder = VAE(x_train.shape[1], half_depth=half_depth, latent_dim=latent_dim, repatr_out=repatr_out, activation=activation, optimizer=optimizer, beta=beta) f_name = [ args.aes[args.aeNO] + '_' + i + '_' + algs[alg_no] + str(ensbl_sz) for i in ('fullmodel', 'encoder', 'decoder') ] # load autoencoder try: autoencoder.model = load_model(os.path.join(folder, f_name[0] + '.h5'), custom_objects={'loss': None}) print(f_name[0] + ' has been loaded!') autoencoder.encoder = load_model(os.path.join(folder, f_name[1] + '.h5'), custom_objects={'loss': None}) print(f_name[1] + ' has been loaded!') autoencoder.decoder = load_model(os.path.join(folder, f_name[2] + '.h5'), custom_objects={'loss': None}) print(f_name[2] + ' has been loaded!') except: print('\nNo autoencoder found. Training {}...\n'.format( args.aes[args.aeNO])) epochs = 200 patience = 0 noise = 0.2 kwargs = {'patience': patience} if args.aes[args.aeNO] == 'ae' and noise: kwargs['noise'] = noise autoencoder.train(x_train, x_test=x_test, epochs=epochs, batch_size=64, verbose=1, **kwargs) # save autoencoder autoencoder.model.save(os.path.join(folder, f_name[0] + '.h5')) autoencoder.encoder.save(os.path.join(folder, f_name[1] + '.h5')) autoencoder.decoder.save(os.path.join(folder, f_name[2] + '.h5')) ##------ define MCMC ------## # initialization # unknown=elliptic_latent.prior.sample(whiten=False) unknown = elliptic_latent.prior.gen_vector() # run MCMC to generate samples print("Preparing %s sampler with step size %g for %d step(s)..." % (args.algs[args.algNO], args.step_sizes[args.algNO], args.step_nums[args.algNO])) emul_geom = lambda q, geom_ord=[ 0 ], whitened=False, **kwargs: geom_emul.geom(q, elliptic, emulator, geom_ord, whitened, **kwargs) latent_geom = lambda q, geom_ord=[0], whitened=False, **kwargs: geom( q, elliptic_latent.pde.V, elliptic.pde.V, autoencoder, geom_ord, whitened, emul_geom=emul_geom, bip_lat=elliptic_latent, bip=elliptic, **kwargs) dream = DREAM( unknown, elliptic_latent, latent_geom, args.step_sizes[args.algNO], args.step_nums[args.algNO], args.algs[args.algNO], whitened=False, log_wts=False ) #,AE=autoencoder)#,k=5,bip_lat=elliptic_latent) # uncomment for manifold algorithms mc_fun = dream.sample mc_args = (args.num_samp, args.num_burnin) mc_fun(*mc_args) # append PDE information including the count of solving filename_ = os.path.join(dream.savepath, dream.filename + '.pckl') filename = os.path.join(dream.savepath, 'Elliptic_' + dream.filename + '_' + args.emus[args.emuNO] + '_' + args.aes[args.aeNO] + '.pckl') # change filename os.rename(filename_, filename) f = open(filename, 'ab') # soln_count=[elliptic.soln_count,elliptic.pde.soln_count] soln_count = elliptic.pde.soln_count pickle.dump([nx, ny, sigma, s, SNR, soln_count, args], f) f.close()
autoencoder=AutoEncoder(x_train.shape[1], half_depth=half_depth, latent_dim=latent_dim, droprate=droprate, activation=activation, optimizer=optimizer) elif AE=='cae': num_filters=[16,8]; latent_dim=elliptic_latent.prior.dim # activations={'conv':tf.keras.layers.LeakyReLU(alpha=0.1),'latent':None} # [16,1] activations={'conv':'elu','latent':'linear'} optimizer=tf.keras.optimizers.Adam(learning_rate=0.001) autoencoder=ConvAutoEncoder(x_train.shape[1:], num_filters=num_filters, latent_dim=latent_dim, activations=activations, optimizer=optimizer) elif AE=='vae': half_depth=5; latent_dim=elliptic_latent.pde.V.dim() repatr_out=False; beta=1. activation='elu' # activation=tf.keras.layers.LeakyReLU(alpha=0.01) optimizer=tf.keras.optimizers.Adam(learning_rate=0.001,amsgrad=True) autoencoder=VAE(x_train.shape[1], half_depth=half_depth, latent_dim=latent_dim, repatr_out=repatr_out, activation=activation, optimizer=optimizer, beta=beta) f_name=[AE+'_'+i+'_'+algs[alg_no]+str(ensbl_sz) for i in ('fullmodel','encoder','decoder')] # load autoencoder try: autoencoder.model=load_model(os.path.join(folder,f_name[0]+'.h5'),custom_objects={'loss':None}) print(f_name[0]+' has been loaded!') autoencoder.encoder=load_model(os.path.join(folder,f_name[1]+'.h5'),custom_objects={'loss':None}) print(f_name[1]+' has been loaded!') autoencoder.decoder=load_model(os.path.join(folder,f_name[2]+'.h5'),custom_objects={'loss':None}) print(f_name[2]+' has been loaded!') except: print('\nNo autoencoder found. Training {}...\n'.format(AE)) epochs=200 patience=0 noise=0.2 kwargs={'patience':patience}
Apply RandomSearchCV to search for best parameters """ from sklearn.model_selection import RandomizedSearchCV rs = RandomizedSearchCV(estimator=vae_hyper, param_distributions=params, n_jobs=1, verbose=2, n_iter=10, cv=5, return_train_score=False, refit=False) rs.fit(X=np.arange(normal_datas.shape[0]),y=None) print ("Best parameters found: ") print (json.dumps(rs.best_params_, indent=2)) """ Create new VAE model for whole dataset with best parameter found """ tf.reset_default_graph() vae = VAE(**rs.best_params_) # save class instance by using cPickle, main purpose is to save parameters too. cPickle.dump(vae, open(os.path.join(save_vae_hyper_folder, 'vae_class.pkl'), 'wb')) vae.build() """ Prepare data """ datas = np.vstack([normal_datas, bearing_datas, gear_datas]) labels = np.hstack([np.zeros(normal_datas.shape[0]), # 0 for inlier, 1 for outlier np.ones(bearing_datas.shape[0]), np.ones(gear_datas.shape[0])]) train_datas, test_datas, train_labels, test_labels = utils.split_train_test(datas=datas, labels=labels, frac=0.8)
def main(): parser = argparse.ArgumentParser() parser.add_argument('algNO', nargs='?', type=int, default=0) parser.add_argument('emuNO', nargs='?', type=int, default=0) parser.add_argument('aeNO', nargs='?', type=int, default=0) parser.add_argument('num_samp', nargs='?', type=int, default=10000) parser.add_argument('num_burnin', nargs='?', type=int, default=10000) parser.add_argument('step_sizes', nargs='?', type=float, default=[.01, .005, .005, None, None]) # AE [.01,.005,.01] parser.add_argument('step_nums', nargs='?', type=int, default=[1, 1, 5, 1, 5]) parser.add_argument('algs', nargs='?', type=str, default=[ 'DREAM' + a for a in ('pCN', 'infMALA', 'infHMC', 'infmMALA', 'infmHMC') ]) parser.add_argument('emus', nargs='?', type=str, default=['dnn', 'cnn']) parser.add_argument('aes', nargs='?', type=str, default=['ae', 'cae', 'vae']) args = parser.parse_args() ##------ define the linear-Gaussian inverse problem ------## # set up d = 3 m = 100 try: with open('./result/lin.pickle', 'rb') as f: [nz_var, pr_cov, A, true_input, y] = pickle.load(f) print('Data loaded!\n') kwargs = {'true_input': true_input, 'A': A, 'y': y} except: print('No data found. Generate new data...\n') nz_var = .1 pr_cov = 1. true_input = np.arange(-np.floor(d / 2), np.ceil(d / 2)) A = np.random.rand(m, d) kwargs = {'true_input': true_input, 'A': A} lin = LiN(d, m, nz_var=nz_var, pr_cov=pr_cov, **kwargs) y = lin.y lin.prior = { 'mean': np.zeros(lin.input_dim), 'cov': np.diag(lin.pr_cov) if np.ndim(lin.pr_cov) == 1 else lin.pr_cov, 'sample': lin.sample } # set up latent latent_dim = 2 class LiN_lat: def __init__(self, input_dim): self.input_dim = input_dim def sample(self, num_samp=1): samp = np.random.randn(num_samp, self.input_dim) return np.squeeze(samp) lin_latent = LiN_lat(latent_dim) lin_latent.prior = { 'mean': np.zeros(lin_latent.input_dim), 'cov': np.eye(lin_latent.input_dim), 'sample': lin_latent.sample } # lin_latent=LiN(latent_dim,lin.output_dim,nz_var=nz_var,pr_cov=pr_cov) # lin_latent.prior={'mean':np.zeros(lin_latent.input_dim),'cov':np.diag(lin_latent.pr_cov) if np.ndim(lin_latent.pr_cov)==1 else lin_latent.pr_cov,'sample':lin_latent.sample} ##------ define networks ------## # training data algorithms algs = ['EKI', 'EKS'] num_algs = len(algs) alg_no = 1 # load data ensbl_sz = 100 folder = './train_NN' # if not os.path.exists(folder): os.makedirs(folder) ##---- EMULATOR ----## # prepare for training data if args.emus[args.emuNO] == 'dnn': loaded = np.load(file=os.path.join( folder, algs[alg_no] + '_ensbl' + str(ensbl_sz) + '_training_XY.npz')) X = loaded['X'] Y = loaded['Y'] elif args.emus[args.emuNO] == 'cnn': loaded = np.load(file=os.path.join( folder, algs[alg_no] + '_ensbl' + str(ensbl_sz) + '_training_XimgY.npz')) X = loaded['X'] Y = loaded['Y'] X = X[:, :, :, None] num_samp = X.shape[0] # n_tr=np.int(num_samp*.75) # x_train,y_train=X[:n_tr],Y[:n_tr] # x_test,y_test=X[n_tr:],Y[n_tr:] tr_idx = np.random.choice(num_samp, size=np.floor(.75 * num_samp).astype('int'), replace=False) te_idx = np.setdiff1d(np.arange(num_samp), tr_idx) x_train, x_test = X[tr_idx], X[te_idx] y_train, y_test = Y[tr_idx], Y[te_idx] # define emulator if args.emus[args.emuNO] == 'dnn': depth = 3 activations = {'hidden': 'softplus', 'output': 'linear'} droprate = 0. optimizer = tf.keras.optimizers.Adam(learning_rate=0.001) emulator = DNN(x_train.shape[1], y_train.shape[1], depth=depth, droprate=droprate, activations=activations, optimizer=optimizer) elif args.emus[args.emuNO] == 'cnn': num_filters = [16, 8, 8] activations = { 'conv': 'softplus', 'latent': 'softmax', 'output': 'linear' } latent_dim = 256 droprate = .5 optimizer = tf.keras.optimizers.Adam(learning_rate=0.001) emulator = CNN(x_train.shape[1:], y_train.shape[1], num_filters=num_filters, latent_dim=latent_dim, droprate=droprate, activations=activations, optimizer=optimizer) f_name = args.emus[args.emuNO] + '_' + algs[alg_no] + str(ensbl_sz) # load emulator try: emulator.model = load_model(os.path.join(folder, f_name + '.h5'), custom_objects={'loss': None}) print(f_name + ' has been loaded!') except: try: emulator.model.load_weights(os.path.join(folder, f_name + '.h5')) print(f_name + ' has been loaded!') except: print('\nNo emulator found. Training {}...\n'.format( args.emus[args.emuNO])) epochs = 1000 patience = 10 emulator.train(x_train, y_train, x_test=x_test, y_test=y_test, epochs=epochs, batch_size=64, verbose=1, patience=patience) # save emulator try: emulator.model.save(os.path.join(folder, f_name + '.h5')) except: emulator.model.save_weights( os.path.join(folder, f_name + '.h5')) ##---- AUTOENCODER ----## # prepare for training data if 'c' in args.aes[args.aeNO]: loaded = np.load(file=os.path.join( folder, algs[alg_no] + '_ensbl' + str(ensbl_sz) + '_training_XimgY.npz')) X = loaded['X'] X = X[:, :-1, :-1, None] else: loaded = np.load(file=os.path.join( folder, algs[alg_no] + '_ensbl' + str(ensbl_sz) + '_training_X.npz')) X = loaded['X'] num_samp = X.shape[0] # n_tr=np.int(num_samp*.75) # x_train=X[:n_tr] # x_test=X[n_tr:] tr_idx = np.random.choice(num_samp, size=np.floor(.75 * num_samp).astype('int'), replace=False) te_idx = np.setdiff1d(np.arange(num_samp), tr_idx) x_train, x_test = X[tr_idx], X[te_idx] # define autoencoder if args.aes[args.aeNO] == 'ae': half_depth = 2 latent_dim = 2 droprate = 0. # activation='linear' activation = tf.keras.layers.LeakyReLU(alpha=2.) optimizer = tf.keras.optimizers.Adam(learning_rate=0.001, amsgrad=True) lambda_ = 0. autoencoder = AutoEncoder(x_train.shape[1], half_depth=half_depth, latent_dim=latent_dim, droprate=droprate, activation=activation, optimizer=optimizer) elif args.aes[args.aeNO] == 'cae': num_filters = [16, 8] latent_dim = 2 # activations={'conv':tf.keras.layers.LeakyReLU(alpha=0.1),'latent':None} # [16,1] activations = {'conv': 'elu', 'latent': 'linear'} optimizer = tf.keras.optimizers.Adam(learning_rate=0.001) autoencoder = ConvAutoEncoder(x_train.shape[1:], num_filters=num_filters, latent_dim=latent_dim, activations=activations, optimizer=optimizer) elif args.aes[args.aeNO] == 'vae': half_depth = 5 latent_dim = 2 repatr_out = False beta = 1. activation = 'elu' # activation=tf.keras.layers.LeakyReLU(alpha=0.01) optimizer = tf.keras.optimizers.Adam(learning_rate=0.001, amsgrad=True) autoencoder = VAE(x_train.shape[1], half_depth=half_depth, latent_dim=latent_dim, repatr_out=repatr_out, activation=activation, optimizer=optimizer, beta=beta) f_name = [ args.aes[args.aeNO] + '_' + i + '_' + algs[alg_no] + str(ensbl_sz) for i in ('fullmodel', 'encoder', 'decoder') ] # load autoencoder try: autoencoder.model = load_model(os.path.join(folder, f_name[0] + '.h5'), custom_objects={'loss': None}) print(f_name[0] + ' has been loaded!') autoencoder.encoder = load_model(os.path.join(folder, f_name[1] + '.h5'), custom_objects={'loss': None}) print(f_name[1] + ' has been loaded!') autoencoder.decoder = load_model(os.path.join(folder, f_name[2] + '.h5'), custom_objects={'loss': None}) print(f_name[2] + ' has been loaded!') except: print('\nNo autoencoder found. Training {}...\n'.format( args.aes[args.aeNO])) epochs = 1000 patience = 10 noise = 0. kwargs = {'patience': patience} if args.aes[args.aeNO] == 'ae' and noise: kwargs['noise'] = noise autoencoder.train(x_train, x_test=x_test, epochs=epochs, batch_size=64, verbose=1, **kwargs) # save autoencoder autoencoder.model.save(os.path.join(folder, f_name[0] + '.h5')) autoencoder.encoder.save(os.path.join(folder, f_name[1] + '.h5')) autoencoder.decoder.save(os.path.join(folder, f_name[2] + '.h5')) ##------ define MCMC ------## # initialization u0 = lin_latent.prior['sample']() emul_geom = lambda q, geom_ord=[ 0 ], whitened=False, **kwargs: geom_emul.geom(q, lin, emulator, geom_ord, whitened, **kwargs) latent_geom = lambda q, geom_ord=[0], whitened=False, **kwargs: geom( q, autoencoder, geom_ord, whitened, emul_geom=emul_geom, **kwargs) # run MCMC to generate samples print("Preparing %s sampler with step size %g for %d step(s)..." % (args.algs[args.algNO], args.step_sizes[args.algNO], args.step_nums[args.algNO])) dream = DREAM( u0, lin_latent, latent_geom, args.step_sizes[args.algNO], args.step_nums[args.algNO], args.algs[args.algNO], whitened=False, vol_wts='adjust', AE=autoencoder ) #,k=5,bip_lat=lin_latent) # uncomment for manifold algorithms mc_fun = dream.sample mc_args = (args.num_samp, args.num_burnin) mc_fun(*mc_args) # append PDE information including the count of solving filename_ = os.path.join(dream.savepath, dream.filename + '.pckl') filename = os.path.join(dream.savepath, 'lin_' + dream.filename + '_' + args.emus[args.emuNO] + '_' + args.aes[args.aeNO] + '.pckl') # change filename os.rename(filename_, filename) f = open(filename, 'ab') pickle.dump([nz_var, pr_cov, A, true_input, y, args], f) f.close()
VAE model """ input_dim = datas.shape[1] enc_hid_dim = 200 n_enc_layer = 2 latent_dim = 100 dec_hid_dim = 200 n_dec_layer = 2 init_lr = 0.001 n_sample = 2 # not used yet beta = 0.5 use_batch_norm = False init_keep_prob = 0.8 vae = VAE(input_dim=input_dim, enc_hid_dim=enc_hid_dim, n_enc_layer=n_enc_layer, latent_dim=latent_dim, dec_hid_dim=dec_hid_dim, n_dec_layer=n_dec_layer, init_lr=init_lr, n_sample=n_sample, beta=beta, use_batch_norm=use_batch_norm,init_keep_prob=init_keep_prob) # save class instance by using cPickle, main purpose is to save parameters too. cPickle.dump(vae,open(os.path.join(save_out_folder,'vae_class.pkl'),'wb')) vae.build() """ Mini-batchs & perform MinMaxScaler """ vae.build_normalize(train_data=train_datas) #1 norm_datas = vae.transform_raw_data(raw_data=train_datas) valida_norm_datas = vae.transform_raw_data(raw_data=valida_datas) mini_batchs = [norm_datas[i:min(i + batch_size, len(norm_datas))] for i in
def main(): parser = argparse.ArgumentParser() parser.add_argument('algNO', nargs='?', type=int, default=0) parser.add_argument('emuNO', nargs='?', type=int, default=1) parser.add_argument('aeNO', nargs='?', type=int, default=0) parser.add_argument('num_samp', nargs='?', type=int, default=5000) parser.add_argument('num_burnin', nargs='?', type=int, default=1000) parser.add_argument('step_sizes', nargs='?', type=float, default=[2e-2, 1e-1, 1e-1, None, None]) # AE [1e-2,1e-2,1e-2] parser.add_argument('step_nums', nargs='?', type=int, default=[1, 1, 5, 1, 5]) parser.add_argument('algs', nargs='?', type=str, default=[ 'DREAM' + a for a in ('pCN', 'infMALA', 'infHMC', 'infmMALA', 'infmHMC') ]) parser.add_argument('emus', nargs='?', type=str, default=['dnn', 'cnn']) parser.add_argument('aes', nargs='?', type=str, default=['ae', 'cae', 'vae']) args = parser.parse_args() ##------ define the inverse problem ------## ## define the Advection-Diffusion invese problem ## # mesh = df.Mesh('ad_10k.xml') meshsz = (61, 61) eldeg = 1 gamma = 2. delta = 10. rel_noise = .5 nref = 1 adif = advdiff(mesh=meshsz, eldeg=eldeg, gamma=gamma, delta=delta, rel_noise=rel_noise, nref=nref, seed=seed) adif.prior.V = adif.prior.Vh adif.misfit.obs = np.array([dat.get_local() for dat in adif.misfit.d.data]).flatten() # set up latent meshsz_latent = (21, 21) adif_latent = advdiff(mesh=meshsz_latent, eldeg=eldeg, gamma=gamma, delta=delta, rel_noise=rel_noise, nref=nref, seed=seed) adif_latent.prior.V = adif_latent.prior.Vh ##------ define networks ------## # training data algorithms algs = ['EKI', 'EKS'] num_algs = len(algs) alg_no = 1 # load data ensbl_sz = 500 folder = './train_NN_eldeg' + str(eldeg) # if not os.path.exists(folder): os.makedirs(folder) ##---- EMULATOR ----## # prepare for training data if args.emus[args.emuNO] == 'dnn': loaded = np.load(file=os.path.join( folder, algs[alg_no] + '_ensbl' + str(ensbl_sz) + '_training_XY.npz')) X = loaded['X'] Y = loaded['Y'] elif args.emus[args.emuNO] == 'cnn': loaded = np.load(file=os.path.join( folder, algs[alg_no] + '_ensbl' + str(ensbl_sz) + '_training_XimgY.npz')) X = loaded['X'] Y = loaded['Y'] X = X[:, :, :, None] num_samp = X.shape[0] # n_tr=np.int(num_samp*.75) # x_train,y_train=X[:n_tr],Y[:n_tr] # x_test,y_test=X[n_tr:],Y[n_tr:] tr_idx = np.random.choice(num_samp, size=np.floor(.75 * num_samp).astype('int'), replace=False) te_idx = np.setdiff1d(np.arange(num_samp), tr_idx) x_train, x_test = X[tr_idx], X[te_idx] y_train, y_test = Y[tr_idx], Y[te_idx] # define emulator if args.emus[args.emuNO] == 'dnn': depth = 5 activations = { 'hidden': tf.keras.layers.LeakyReLU(alpha=.01), 'output': 'linear' } droprate = 0.25 optimizer = tf.keras.optimizers.Adam(learning_rate=0.001, amsgrad=True) emulator = DNN(x_train.shape[1], y_train.shape[1], depth=depth, droprate=droprate, activations=activations, optimizer=optimizer) elif args.emus[args.emuNO] == 'cnn': num_filters = [16, 8, 4] activations = { 'conv': tf.keras.layers.LeakyReLU(alpha=0.2), 'latent': tf.keras.layers.PReLU(), 'output': 'linear' } latent_dim = 1024 droprate = .5 optimizer = tf.keras.optimizers.Adam(learning_rate=0.001, amsgrad=True) emulator = CNN(x_train.shape[1:], y_train.shape[1], num_filters=num_filters, latent_dim=latent_dim, droprate=droprate, activations=activations, optimizer=optimizer) f_name = args.emus[args.emuNO] + '_' + algs[alg_no] + str(ensbl_sz) # load emulator try: emulator.model = load_model(os.path.join(folder, f_name + '.h5'), custom_objects={'loss': None}) print(f_name + ' has been loaded!') except: try: emulator.model.load_weights(os.path.join(folder, f_name + '.h5')) print(f_name + ' has been loaded!') except: print('\nNo emulator found. Training {}...\n'.format( args.emus[args.emuNO])) epochs = 200 patience = 0 emulator.train(x_train, y_train, x_test=x_test, y_test=y_test, epochs=epochs, batch_size=64, verbose=1, patience=patience) # save emulator try: emulator.model.save(os.path.join(folder, f_name + '.h5')) except: emulator.model.save_weights( os.path.join(folder, f_name + '.h5')) ##---- AUTOENCODER ----## # prepare for training data if 'c' in args.aes[args.aeNO]: loaded = np.load(file=os.path.join( folder, algs[alg_no] + '_ensbl' + str(ensbl_sz) + '_training_XimgY.npz')) X = loaded['X'] X = X[:, :-1, :-1, None] else: loaded = np.load(file=os.path.join( folder, algs[alg_no] + '_ensbl' + str(ensbl_sz) + '_training_XY.npz')) X = loaded['X'] num_samp = X.shape[0] # n_tr=np.int(num_samp*.75) # x_train=X[:n_tr] # x_test=X[n_tr:] tr_idx = np.random.choice(num_samp, size=np.floor(.75 * num_samp).astype('int'), replace=False) te_idx = np.setdiff1d(np.arange(num_samp), tr_idx) x_train, x_test = X[tr_idx], X[te_idx] # define autoencoder if args.aes[args.aeNO] == 'ae': half_depth = 3 latent_dim = adif_latent.prior.V.dim() droprate = 0. activation = 'elu' # activation=tf.keras.layers.LeakyReLU(alpha=1.5) optimizer = tf.keras.optimizers.Adam(learning_rate=0.001, amsgrad=True) lambda_ = 0. autoencoder = AutoEncoder(x_train.shape[1], half_depth=half_depth, latent_dim=latent_dim, droprate=droprate, activation=activation, optimizer=optimizer) elif args.aes[args.aeNO] == 'cae': num_filters = [16, 8] latent_dim = adif_latent.prior.V.dim() # activations={'conv':tf.keras.layers.LeakyReLU(alpha=0.1),'latent':None} # [16,1] activations = {'conv': 'elu', 'latent': 'linear'} optimizer = tf.keras.optimizers.Adam(learning_rate=0.001) autoencoder = ConvAutoEncoder(x_train.shape[1:], num_filters=num_filters, latent_dim=latent_dim, activations=activations, optimizer=optimizer) elif args.aes[args.aeNO] == 'vae': half_depth = 5 latent_dim = adif_latent.prior.V.dim() repatr_out = False beta = 1. activation = 'elu' # activation=tf.keras.layers.LeakyReLU(alpha=0.01) optimizer = tf.keras.optimizers.Adam(learning_rate=0.001, amsgrad=True) autoencoder = VAE(x_train.shape[1], half_depth=half_depth, latent_dim=latent_dim, repatr_out=repatr_out, activation=activation, optimizer=optimizer, beta=beta) f_name = [ args.aes[args.aeNO] + '_' + i + '_' + algs[alg_no] + str(ensbl_sz) for i in ('fullmodel', 'encoder', 'decoder') ] # load autoencoder try: autoencoder.model = load_model(os.path.join(folder, f_name[0] + '.h5'), custom_objects={'loss': None}) print(f_name[0] + ' has been loaded!') autoencoder.encoder = load_model(os.path.join(folder, f_name[1] + '.h5'), custom_objects={'loss': None}) print(f_name[1] + ' has been loaded!') autoencoder.decoder = load_model(os.path.join(folder, f_name[2] + '.h5'), custom_objects={'loss': None}) print(f_name[2] + ' has been loaded!') except: print('\nNo autoencoder found. Training {}...\n'.format( args.aes[args.aeNO])) epochs = 200 patience = 0 noise = 0. kwargs = {'patience': patience} if args.aes[args.aeNO] == 'ae' and noise: kwargs['noise'] = noise autoencoder.train(x_train, x_test=x_test, epochs=epochs, batch_size=64, verbose=1, **kwargs) # save autoencoder autoencoder.model.save(os.path.join(folder, f_name[0] + '.h5')) autoencoder.encoder.save(os.path.join(folder, f_name[1] + '.h5')) autoencoder.decoder.save(os.path.join(folder, f_name[2] + '.h5')) ##------ define MCMC ------## # initialization u0 = adif_latent.prior.sample(whiten=False) emul_geom = lambda q, geom_ord=[ 0 ], whitened=False, **kwargs: geom_emul.geom(q, adif, emulator, geom_ord, whitened, **kwargs) latent_geom = lambda q, geom_ord=[0], whitened=False, **kwargs: geom( q, adif_latent, adif, autoencoder, geom_ord, whitened, emul_geom=emul_geom, **kwargs) # run MCMC to generate samples print("Preparing %s sampler with step size %g for %d step(s)..." % (args.algs[args.algNO], args.step_sizes[args.algNO], args.step_nums[args.algNO])) dream = DREAM( u0, adif_latent, latent_geom, args.step_sizes[args.algNO], args.step_nums[args.algNO], args.algs[args.algNO], whitened=False, log_wts=False ) #,AE=autoencoder)#,k=5) # uncomment for manifold algorithms mc_fun = dream.sample mc_args = (args.num_samp, args.num_burnin) mc_fun(*mc_args) # append PDE information including the count of solving filename_ = os.path.join(dream.savepath, dream.filename + '.pckl') filename = os.path.join(dream.savepath, 'AdvDiff_' + dream.filename + '_' + args.emus[args.emuNO] + '_' + args.aes[args.aeNO] + '.pckl') # change filename os.rename(filename_, filename) f = open(filename, 'ab') soln_count = adif_latent.pde.soln_count pickle.dump([meshsz, meshsz_latent, rel_noise, nref, soln_count, args], f) f.close()