def do_predict(FLAGS, datasets): """ 调用堆栈降噪自动编码器进行预测 :param FLAGS: tensorflow.app.flags.FLAGS 类型。包含了定义的网络模型的各项参数 :param datasets: ndarray 类型。制作好的测试数据 :return: ndarray 类型。预测结果 """ # 将自动编码器层参数从字符串转换为其特定类型 dae_layers = utilities.flag_to_list(FLAGS.dae_layers, 'int') dae_enc_act_func = utilities.flag_to_list(FLAGS.dae_enc_act_func, 'str') dae_dec_act_func = utilities.flag_to_list(FLAGS.dae_dec_act_func, 'str') dae_opt = utilities.flag_to_list(FLAGS.dae_opt, 'str') dae_loss_func = utilities.flag_to_list(FLAGS.dae_loss_func, 'str') dae_learning_rate = utilities.flag_to_list(FLAGS.dae_learning_rate, 'float') dae_regcoef = utilities.flag_to_list(FLAGS.dae_regcoef, 'float') dae_corr_type = utilities.flag_to_list(FLAGS.dae_corr_type, 'str') dae_corr_frac = utilities.flag_to_list(FLAGS.dae_corr_frac, 'float') dae_num_epochs = utilities.flag_to_list(FLAGS.dae_num_epochs, 'int') dae_batch_size = utilities.flag_to_list(FLAGS.dae_batch_size, 'int') # 检查参数 assert all([0. <= cf <= 1. for cf in dae_corr_frac]) assert all([ct in ['masking', 'salt_and_pepper', 'none'] for ct in dae_corr_type]) assert FLAGS.dataset in ['mnist', 'cifar10', 'custom'] assert len(dae_layers) > 0 assert all([af in ['sigmoid', 'tanh'] for af in dae_enc_act_func]) assert all([af in ['sigmoid', 'tanh', 'none'] for af in dae_dec_act_func]) utilities.random_seed_np_tf(FLAGS.seed) # 创建编码、解码、微调函数和网络模型对象 dae_enc_act_func = [utilities.str2actfunc(af) for af in dae_enc_act_func] dae_dec_act_func = [utilities.str2actfunc(af) for af in dae_dec_act_func] finetune_act_func = utilities.str2actfunc(FLAGS.finetune_act_func) sdae = stacked_denoising_autoencoder.StackedDenoisingAutoencoder( do_pretrain=FLAGS.do_pretrain, name=FLAGS.name, layers=dae_layers, finetune_loss_func=FLAGS.finetune_loss_func, finetune_learning_rate=FLAGS.finetune_learning_rate, finetune_num_epochs=FLAGS.finetune_num_epochs, finetune_opt=FLAGS.finetune_opt, finetune_batch_size=FLAGS.finetune_batch_size, finetune_dropout=FLAGS.finetune_dropout, enc_act_func=dae_enc_act_func, dec_act_func=dae_dec_act_func, corr_type=dae_corr_type, corr_frac=dae_corr_frac, regcoef=dae_regcoef, loss_func=dae_loss_func, opt=dae_opt, learning_rate=dae_learning_rate, momentum=FLAGS.momentum, num_epochs=dae_num_epochs, batch_size=dae_batch_size, finetune_act_func=finetune_act_func) # 训练模型 (无监督预训练) # if FLAGS.do_pretrain: # encoded_X, encoded_vX = sdae.pretrain(trX, vlX) teX = datasets # print('Saving the predictions for the test set...') internal_predictions = sdae.predict(teX) return internal_predictions
def do_train(internal_FLAGS, trX, vlX, trY, vlY): # 将自动编码器层参数从字符串转换为其特定类型 dae_layers = utilities.flag_to_list(internal_FLAGS.dae_layers, 'int') dae_enc_act_func = utilities.flag_to_list(internal_FLAGS.dae_enc_act_func, 'str') dae_dec_act_func = utilities.flag_to_list(internal_FLAGS.dae_dec_act_func, 'str') dae_opt = utilities.flag_to_list(internal_FLAGS.dae_opt, 'str') dae_loss_func = utilities.flag_to_list(internal_FLAGS.dae_loss_func, 'str') dae_learning_rate = utilities.flag_to_list(internal_FLAGS.dae_learning_rate, 'float') dae_regcoef = utilities.flag_to_list(internal_FLAGS.dae_regcoef, 'float') dae_corr_type = utilities.flag_to_list(internal_FLAGS.dae_corr_type, 'str') dae_corr_frac = utilities.flag_to_list(internal_FLAGS.dae_corr_frac, 'float') dae_num_epochs = utilities.flag_to_list(internal_FLAGS.dae_num_epochs, 'int') dae_batch_size = utilities.flag_to_list(internal_FLAGS.dae_batch_size, 'int') # 检查参数 assert all([0. <= cf <= 1. for cf in dae_corr_frac]) assert all([ct in ['masking', 'salt_and_pepper', 'none'] for ct in dae_corr_type]) assert internal_FLAGS.dataset in ['mnist', 'cifar10', 'custom'] assert len(dae_layers) > 0 assert all([af in ['sigmoid', 'tanh'] for af in dae_enc_act_func]) assert all([af in ['sigmoid', 'tanh', 'none'] for af in dae_dec_act_func]) utilities.random_seed_np_tf(internal_FLAGS.seed) # 创建编码、解码、微调函数和网络模型对象 sdae = None dae_enc_act_func = [utilities.str2actfunc(af) for af in dae_enc_act_func] dae_dec_act_func = [utilities.str2actfunc(af) for af in dae_dec_act_func] finetune_act_func = utilities.str2actfunc(internal_FLAGS.finetune_act_func) sdae = stacked_denoising_autoencoder.StackedDenoisingAutoencoder( do_pretrain=internal_FLAGS.do_pretrain, name=internal_FLAGS.name, layers=dae_layers, finetune_loss_func=internal_FLAGS.finetune_loss_func, finetune_learning_rate=internal_FLAGS.finetune_learning_rate, finetune_num_epochs=internal_FLAGS.finetune_num_epochs, finetune_opt=internal_FLAGS.finetune_opt, finetune_batch_size=internal_FLAGS.finetune_batch_size, finetune_dropout=internal_FLAGS.finetune_dropout, enc_act_func=dae_enc_act_func, dec_act_func=dae_dec_act_func, corr_type=dae_corr_type, corr_frac=dae_corr_frac, regcoef=dae_regcoef, loss_func=dae_loss_func, opt=dae_opt, learning_rate=dae_learning_rate, momentum=internal_FLAGS.momentum, num_epochs=dae_num_epochs, batch_size=dae_batch_size, finetune_act_func=finetune_act_func) # 训练模型 (无监督预训练) if internal_FLAGS.do_pretrain: encoded_X, encoded_vX = sdae.pretrain(trX, vlX) trY = np.array(trY) vlY = np.array(vlY) # 有监督微调 sdae.fit(trX, trY, vlX, vlY)
def run_cnn(dataset_dir): # utilities.random_seed_np_tf(FLAGS.seed) utilities.random_seed_np_tf(-1) # common parameters cifar_dir = dataset_dir num_epochs = 3 batch_size = 64 n_classes = 10 # parameters for cnn name_cnn = 'cnn' original_shape_cnn = '32,32,3' layers_cnn = 'conv2d-5-5-32-1,maxpool-2,conv2d-5-5-64-1,maxpool-2,full-1024,softmax' loss_func_cnn = 'softmax_cross_entropy' opt_cnn = 'adam' learning_rate_cnn = 1e-4 momentum_cnn = 0.5 # not used dropout_cnn = 0.5 batch_norm = True # prepare data trX, trY, teX, teY = datasets.load_cifar10_dataset(cifar_dir, mode='supervised') # due to the memory limit, cannot use the whole training set trY_non_one_hot = trY trY = np.array(utilities.to_one_hot(trY)) teY = np.array(teY) teY_non_one_hot = teY[5000:] teY = np.array(utilities.to_one_hot(teY)) # first half test set is validation set vlX = teX[:5000] vlY = teY[:5000] teX = teX[5000:] teY = teY[5000:] # define Convolutional Network cnn = conv_net.ConvolutionalNetwork( original_shape=[int(i) for i in original_shape_cnn.split(',')], layers=layers_cnn, name=name_cnn, loss_func=loss_func_cnn, num_epochs=num_epochs, batch_size=batch_size, opt=opt_cnn, learning_rate=learning_rate_cnn, momentum=momentum_cnn, dropout=dropout_cnn, batch_norm = batch_norm ) print('Start Convolutional Network training...') cnn.fit(trX, trY, vlX, vlY) # supervised learning
'Loss function. ["mean_squared" or "softmax_cross_entropy"]') flags.DEFINE_integer('num_epochs', 10, 'Number of epochs.') flags.DEFINE_integer('batch_size', 10, 'Size of each mini-batch.') flags.DEFINE_string('opt', 'gradient_descent', '["gradient_descent", "ada_grad", "momentum", "adam"]') flags.DEFINE_float('learning_rate', 0.01, 'Initial learning rate.') flags.DEFINE_float('momentum', 0.5, 'Momentum parameter.') flags.DEFINE_float('dropout', 1, 'Dropout parameter.') assert FLAGS.dataset in ['mnist', 'cifar10', 'custom'] assert FLAGS.opt in ['gradient_descent', 'ada_grad', 'momentum', 'adam'] assert FLAGS.loss_func in ['mean_squared', 'softmax_cross_entropy'] if __name__ == '__main__': utilities.random_seed_np_tf(FLAGS.seed) if FLAGS.dataset == 'mnist': # ################# # # MNIST Dataset # # ################# # trX, trY, vlX, vlY, teX, teY = datasets.load_mnist_dataset( mode='supervised') elif FLAGS.dataset == 'cifar10': # ################### # # Cifar10 Dataset # # ################### #
flags.DEFINE_string('model_name', 'rbm_model', 'Name for the model.') flags.DEFINE_integer('verbose', 0, 'Level of verbosity. 0 - silent, 1 - print accuracy.') flags.DEFINE_integer('gibbs_sampling_steps', 1, 'Number of gibbs sampling steps in Contrastive Divergence.') flags.DEFINE_float('learning_rate', 0.01, 'Initial learning rate.') flags.DEFINE_float('stddev', 0.1, 'Standard deviation for the Gaussian visible units.') flags.DEFINE_integer('num_epochs', 10, 'Number of epochs.') flags.DEFINE_integer('batch_size', 10, 'Size of each mini-batch.') flags.DEFINE_integer('transform_gibbs_sampling_steps', 10, 'Gibbs sampling steps for the transformation of data.') assert FLAGS.dataset in ['mnist', 'cifar10', 'custom'] assert FLAGS.cifar_dir != '' if FLAGS.dataset == 'cifar10' else True assert FLAGS.visible_unit_type in ['bin', 'gauss'] if __name__ == '__main__': utilities.random_seed_np_tf(FLAGS.seed) if FLAGS.dataset == 'mnist': # ################# # # MNIST Dataset # # ################# # trX, vlX, teX = datasets.load_mnist_dataset(mode='unsupervised') width, height = 28, 28 elif FLAGS.dataset == 'cifar10': # ################### # # Cifar10 Dataset # # ################### #
def do_predict(FLAGS, predictions_dir, num): dataset_path = "/media/files/yp/rbm/yinchuansanqu/dataset/" f_list1 = os.listdir(dataset_path) for i in f_list1: if os.path.splitext(i)[1] == '.npy': # 将自动编码器层参数从字符串转换为其特定类型 dae_layers = utilities.flag_to_list(FLAGS.dae_layers, 'int') dae_enc_act_func = utilities.flag_to_list(FLAGS.dae_enc_act_func, 'str') dae_dec_act_func = utilities.flag_to_list(FLAGS.dae_dec_act_func, 'str') dae_opt = utilities.flag_to_list(FLAGS.dae_opt, 'str') dae_loss_func = utilities.flag_to_list(FLAGS.dae_loss_func, 'str') dae_learning_rate = utilities.flag_to_list(FLAGS.dae_learning_rate, 'float') dae_regcoef = utilities.flag_to_list(FLAGS.dae_regcoef, 'float') dae_corr_type = utilities.flag_to_list(FLAGS.dae_corr_type, 'str') dae_corr_frac = utilities.flag_to_list(FLAGS.dae_corr_frac, 'float') dae_num_epochs = utilities.flag_to_list(FLAGS.dae_num_epochs, 'int') dae_batch_size = utilities.flag_to_list(FLAGS.dae_batch_size, 'int') # 检查参数 assert all([0. <= cf <= 1. for cf in dae_corr_frac]) assert all([ct in ['masking', 'salt_and_pepper', 'none'] for ct in dae_corr_type]) assert FLAGS.dataset in ['mnist', 'cifar10', 'custom'] assert len(dae_layers) > 0 assert all([af in ['sigmoid', 'tanh'] for af in dae_enc_act_func]) assert all([af in ['sigmoid', 'tanh', 'none'] for af in dae_dec_act_func]) utilities.random_seed_np_tf(FLAGS.seed) def load_from_np(dataset_path): if dataset_path != '': return np.load(dataset_path) else: return None # 创建编码、解码、微调函数和网络模型对象 sdae = None dae_enc_act_func = [utilities.str2actfunc(af) for af in dae_enc_act_func] dae_dec_act_func = [utilities.str2actfunc(af) for af in dae_dec_act_func] finetune_act_func = utilities.str2actfunc(FLAGS.finetune_act_func) sdae = stacked_denoising_autoencoder.StackedDenoisingAutoencoder( do_pretrain=FLAGS.do_pretrain, name=FLAGS.name, layers=dae_layers, finetune_loss_func=FLAGS.finetune_loss_func, finetune_learning_rate=FLAGS.finetune_learning_rate, finetune_num_epochs=FLAGS.finetune_num_epochs, finetune_opt=FLAGS.finetune_opt, finetune_batch_size=FLAGS.finetune_batch_size, finetune_dropout=FLAGS.finetune_dropout, enc_act_func=dae_enc_act_func, dec_act_func=dae_dec_act_func, corr_type=dae_corr_type, corr_frac=dae_corr_frac, regcoef=dae_regcoef, loss_func=dae_loss_func, opt=dae_opt, learning_rate=dae_learning_rate, momentum=FLAGS.momentum, num_epochs=dae_num_epochs, batch_size=dae_batch_size, finetune_act_func=finetune_act_func) # 训练模型 (无监督预训练) if FLAGS.do_pretrain: encoded_X, encoded_vX = sdae.pretrain(trX, vlX) FLAGS.test_dataset = dataset_path + i # FLAGS.test_labels = "/media/files/yp/rbm/pic_div/label/binary/" + str(idx) + ".npy" FLAGS.save_predictions = predictions_dir + i # teX, teY = load_from_np(FLAGS.test_dataset), load_from_np(FLAGS.test_labels) teX = load_from_np(FLAGS.test_dataset) # 计算模型在测试集上的准确率 # print('Test set accuracy: {}'.format(sdae.score(teX, teY))) # 保存模型的预测 if FLAGS.save_predictions: print('Saving the predictions for the test set...') predict = sdae.predict(teX).astype(np.float16) np.save(FLAGS.save_predictions, predict) # for idx in range(0, num): # # 将自动编码器层参数从字符串转换为其特定类型 # dae_layers = utilities.flag_to_list(FLAGS.dae_layers, 'int') # dae_enc_act_func = utilities.flag_to_list(FLAGS.dae_enc_act_func, 'str') # dae_dec_act_func = utilities.flag_to_list(FLAGS.dae_dec_act_func, 'str') # dae_opt = utilities.flag_to_list(FLAGS.dae_opt, 'str') # dae_loss_func = utilities.flag_to_list(FLAGS.dae_loss_func, 'str') # dae_learning_rate = utilities.flag_to_list(FLAGS.dae_learning_rate, 'float') # dae_regcoef = utilities.flag_to_list(FLAGS.dae_regcoef, 'float') # dae_corr_type = utilities.flag_to_list(FLAGS.dae_corr_type, 'str') # dae_corr_frac = utilities.flag_to_list(FLAGS.dae_corr_frac, 'float') # dae_num_epochs = utilities.flag_to_list(FLAGS.dae_num_epochs, 'int') # dae_batch_size = utilities.flag_to_list(FLAGS.dae_batch_size, 'int') # # # 检查参数 # assert all([0. <= cf <= 1. for cf in dae_corr_frac]) # assert all([ct in ['masking', 'salt_and_pepper', 'none'] for ct in dae_corr_type]) # assert FLAGS.dataset in ['mnist', 'cifar10', 'custom'] # assert len(dae_layers) > 0 # assert all([af in ['sigmoid', 'tanh'] for af in dae_enc_act_func]) # assert all([af in ['sigmoid', 'tanh', 'none'] for af in dae_dec_act_func]) # # utilities.random_seed_np_tf(FLAGS.seed) # # def load_from_np(dataset_path): # if dataset_path != '': # return np.load(dataset_path) # else: # return None # # # 创建编码、解码、微调函数和网络模型对象 # sdae = None # # dae_enc_act_func = [utilities.str2actfunc(af) for af in dae_enc_act_func] # dae_dec_act_func = [utilities.str2actfunc(af) for af in dae_dec_act_func] # finetune_act_func = utilities.str2actfunc(FLAGS.finetune_act_func) # # sdae = stacked_denoising_autoencoder.StackedDenoisingAutoencoder( # do_pretrain=FLAGS.do_pretrain, name=FLAGS.name, # layers=dae_layers, finetune_loss_func=FLAGS.finetune_loss_func, # finetune_learning_rate=FLAGS.finetune_learning_rate, finetune_num_epochs=FLAGS.finetune_num_epochs, # finetune_opt=FLAGS.finetune_opt, finetune_batch_size=FLAGS.finetune_batch_size, # finetune_dropout=FLAGS.finetune_dropout, # enc_act_func=dae_enc_act_func, dec_act_func=dae_dec_act_func, # corr_type=dae_corr_type, corr_frac=dae_corr_frac, regcoef=dae_regcoef, # loss_func=dae_loss_func, opt=dae_opt, # learning_rate=dae_learning_rate, momentum=FLAGS.momentum, # num_epochs=dae_num_epochs, batch_size=dae_batch_size, # finetune_act_func=finetune_act_func) # # # 训练模型 (无监督预训练) # if FLAGS.do_pretrain: # encoded_X, encoded_vX = sdae.pretrain(trX, vlX) # # FLAGS.test_dataset = "/media/files/yp/rbm/pic_div/dataset/test" + str(idx) + "_RGB.npy" # # FLAGS.test_labels = "/media/files/yp/rbm/pic_div/label/binary/" + str(idx) + ".npy" # FLAGS.save_predictions = predictions_dir + str(idx) + ".npy" # # teX, teY = load_from_np(FLAGS.test_dataset), load_from_np(FLAGS.test_labels) # teX = load_from_np(FLAGS.test_dataset) # # 计算模型在测试集上的准确率 # # print('Test set accuracy: {}'.format(sdae.score(teX, teY))) # # # 保存模型的预测 # if FLAGS.save_predictions: # print('Saving the predictions for the test set...') # np.save(FLAGS.save_predictions, sdae.predict(teX)) # # def save_layers_output(which_set): # # if which_set == 'test': # teout = sdae.get_layers_output(teX) # for i, o in enumerate(teout): # np.save(FLAGS.save_layers_output_test + '-layer-' + str(i + 1) + '-test', o) # # # 保存模型每一层对测试集的输出 # if FLAGS.save_layers_output_test: # print('Saving the output of each layer for the test set') # save_layers_output('test') # # # 保存模型每一层对训练集的输出 # if FLAGS.save_layers_output_train: # print('Saving the output of each layer for the train set') # save_layers_output('train') # print '-------------------------------------------预测过程已经完成---------------------------------------------------'
rbm_layers = [100, 100, 100, 100] rbm_learning_rate = [rbm_1, rbm_2, rbm_3, rbm_4] #rbm_layers = [100, 100, 100, 100, 100, 100, 100, 100, 100, 100] #rbm_learning_rate = [rbm_1, rbm_2, rbm_3, rbm_4, 0.07, 0.07, 0.05, 0.05, 0.05, 0.05] rbm_num_epochs = [200] rbm_batch_size = [10] rbm_gibbs_k = [1] finetune_opt = 'adam' # sgd/adagrad/momentum/adam finetune_loss_func = 'softmax_cross_entropy' # softmax_cross_entropy/mse finetune_dropout = 1 finetune_num_epochs = 1 if __name__ == '__main__': utilities.random_seed_np_tf(2) trX, trY = np.load(sys.argv[1]), np.load(sys.argv[2]) vlX, vlY = np.load(sys.argv[3]), np.load(sys.argv[4]) # Create the object finetune_act_func = utilities.str2actfunc('relu') srbm = dbn.DeepBeliefNetwork( name='dbn', rbm_layers=rbm_layers, finetune_act_func=finetune_act_func, rbm_learning_rate=rbm_learning_rate, rbm_num_epochs=rbm_num_epochs, rbm_gibbs_k=rbm_gibbs_k, rbm_gauss_visible=True,
def generate_feature_sets(dataset_dir, fs_filename, tr_size): # utilities.random_seed_np_tf(FLAGS.seed) utilities.random_seed_np_tf(-1) # common parameters cifar_dir = dataset_dir num_epochs = 3 batch_size = 64 n_classes = 10 # parameters for dae name_dae = 'dae' n_components_dae = 1024 enc_act_func_dae = tf.nn.sigmoid dec_act_func_dae = tf.nn.sigmoid corr_type_dae = 'masking' corr_frac_dae = 0.5 loss_func_dae = 'cross_entropy' opt_dae = 'momentum' regcoef_dae = 5e-4 learning_rate_dae = 0.05 momentum_dae = 0.9 # parameters for cnn name_cnn = 'cnn' original_shape_cnn = '32,32,3' layers_cnn = 'conv2d-5-5-32-1,maxpool-2,conv2d-5-5-64-1,maxpool-2,full-1024,softmax' loss_func_cnn = 'softmax_cross_entropy' opt_cnn = 'adam' learning_rate_cnn = 1e-4 momentum_cnn = 0.5 # not used dropout_cnn = 0.5 # loading data trX, trY, teX, teY = datasets.load_cifar10_dataset(cifar_dir, mode='supervised') # due to the memory limit, cannot use the whole training set trX = trX[:tr_size] trY = trY[:tr_size] trY_non_one_hot = trY trY = np.array(utilities.to_one_hot(trY)) teY = np.array(teY) teY_non_one_hot = teY[5000:] teY = np.array(utilities.to_one_hot(teY)) # first half test set is validation set vlX = teX[:5000] vlY = teY[:5000] teX = teX[5000:] teY = teY[5000:] fs_file = open(fs_filename, 'wb') pickle.dump(trY_non_one_hot, fs_file) pickle.dump(teY_non_one_hot, fs_file) # define Denoising Autoencoder dae = denoising_autoencoder.DenoisingAutoencoder( name=name_dae, n_components=n_components_dae, enc_act_func=enc_act_func_dae, dec_act_func=dec_act_func_dae, corr_type=corr_type_dae, corr_frac=corr_frac_dae, loss_func=loss_func_dae, opt=opt_dae, regcoef=regcoef_dae, learning_rate=learning_rate_dae, momentum=momentum_dae, num_epochs=num_epochs, batch_size=batch_size) print('Start Denoising Autoencoder training...') dae.fit(trX, trX, vlX, vlX) # unsupervised learning feature_train_set_1 = dae.extract_features(trX) pickle.dump(feature_train_set_1, fs_file) feature_test_set_1 = dae.extract_features(teX) pickle.dump(feature_test_set_1, fs_file) # define Convolutional Network cnn = conv_net.ConvolutionalNetwork( original_shape=[int(i) for i in original_shape_cnn.split(',')], layers=layers_cnn, name=name_cnn, loss_func=loss_func_cnn, num_epochs=num_epochs, batch_size=batch_size, opt=opt_cnn, learning_rate=learning_rate_cnn, momentum=momentum_cnn, dropout=dropout_cnn) print('Start Convolutional Network training...') cnn.fit(trX, trY, vlX, vlY) # supervised learning feature_train_set_2 = cnn.extract_features(trX) pickle.dump(feature_train_set_2, fs_file) feature_test_set_2 = cnn.extract_features(teX) pickle.dump(feature_test_set_2, fs_file) fs_file.close()