default=None, help='dropout ratio') parser.add_argument('--dropconnect', type=float, default=None, help='dropconnect ratio') args = parser.parse_args() if not os.path.isdir(args.save_path): os.mkdir(args.save_path) X, X_test = mnist_data(args.precision) rbm = BernoulliRBM(n_visible=784, vb_init=logit_mean(X) if args.sparsity else None, n_hidden=args.n_hidden, precision=args.precision, algorithm=args.algorithm, anneal_lr=args.anneal_lr, n_gibbs_step=args.n_gibbs_steps, learning_rate=args.lr, \ use_momentum=args.momentum, momentum=[0.5,0.5,0.5,0.5,0.5,0.9], max_epoch=args.epochs, batch_size=args.batch_size, regularization=args.regularization, \ rl_coeff=1e-4, sample_h_state=True, sample_v_state=args.sample_v, save_path=args.save_path, save_after_each_epoch=False, sparsity=args.sparsity, \ sparsity_cost=1e-4, sparsity_target=0.1, sparsity_damping=0.9, verbose=args.verbose, dropout=args.dropout, dropconnect=args.dropconnect, img_shape=(28, 28)) # using previous paper's weights # weights_file = '../data/mnist/mnistvh_CD25.mat' # weights = scio.loadmat(weights_file) rbm._set_weights(weights['vishid'], weights['visbiases'], weights['hidbiases']) batch = X_test[:100, ] # logZ, avg_logp = rbm.fit(X, X_test) # rbm._save_weights('original_sala') if args.n_hidden < 30: logZ, avg_logp = eval_logp(rbm._sess, True, rbm._w, rbm._vb, rbm._hb, X, X_test, args.precision, 100, 100) else: logZ, avg_logp = eval_logp(rbm._sess, False, rbm._w, rbm._vb, rbm._hb, X, X_test, args.precision, 100, 100)
def unsupervised_pretrain(): weights = [] masks = [] g_1 = tf.Graph() with g_1.as_default(): rbm1 = BernoulliRBM(n_visible=784, n_hidden=500, precision='float32', algorithm='CD', anneal_lr=False, learning_rate=0.1, \ use_momentum=True, momentum=[0.5,0.5,0.5,0.5,0.5,0.9], max_epoch=50, batch_size=100, regularization='L2', \ rl_coeff=1e-4, sample_h_state=True, sample_v_state=True, save_path='/data/experiments/pruning_rbm/mnist/pretraining/', save_after_each_epoch=False, verbose=False) # rbm1._load_weights('original_sala') print('RBM1:') logZ, avg_logp = rbm1.fit(X, X_test_binary, retrain=True) print('baseline:logZ:%f, average logp:%f' % (logZ, avg_logp)) pruning_iter_probability(rbm1, 7, 0.3, X, X_test_binary) rbm1._save_weights_mask('classification_w1_pruned_92') weights.append(rbm1._sess.run(rbm1._w)) weights.append(rbm1._sess.run(rbm1._hb)) masks.append(rbm1._sess.run(rbm1._mask)) X_1 = rbm1._sess.run(rbm1._h_means_given_v(X_binary)) X_1_binary = rbm1._sess.run( rbm1._sample_h(rbm1._h_means_given_v(X_binary))) X_1_test_binary = rbm1._sess.run( rbm1._sample_h(rbm1._h_means_given_v(X_test_binary))) rbm1._sess.close() g_2 = tf.Graph() with g_2.as_default(): rbm2 = BernoulliRBM(n_visible=500, n_hidden=500, precision='float32', algorithm='CD', anneal_lr=False, learning_rate=0.1, \ use_momentum=True, momentum=[0.5,0.5,0.5,0.5,0.5,0.9], max_epoch=50, batch_size=100, regularization='L2', \ rl_coeff=1e-4, sample_h_state=True, sample_v_state=True, save_path='/data/experiments/pruning_rbm/mnist/pretraining/', save_after_each_epoch=False, verbose=False) print('RBM2:') logZ, avg_logp = rbm2.fit(X_1, X_1_test_binary, retrain=True) print('baseline:logZ:%f, average logp:%f' % (logZ, avg_logp)) pruning_iter_probability(rbm2, 7, 0.3, X_1, X_1_test_binary) rbm2._save_weights_mask('classification_w2_hinton_pruned_92') weights.append(rbm2._sess.run(rbm2._w)) weights.append(rbm2._sess.run(rbm2._hb)) masks.append(rbm2._sess.run(rbm2._mask)) X_2 = rbm2._sess.run(rbm2._h_means_given_v(X_1_binary)) X_2_binary = rbm2._sess.run( rbm2._sample_v(rbm2._h_means_given_v(X_1_binary))) X_2_test_binary = rbm2._sess.run( rbm2._sample_h(rbm2._h_means_given_v(X_1_test_binary))) rbm2._sess.close() g_3 = tf.Graph() with g_3.as_default(): rbm3 = BernoulliRBM(n_visible=500, n_hidden=2000, precision='float32', algorithm='CD', anneal_lr=False, learning_rate=0.1, \ use_momentum=True, momentum=[0.5,0.5,0.5,0.5,0.5,0.9], max_epoch=50, batch_size=100, regularization='L2', \ rl_coeff=1e-4, sample_h_state=True, sample_v_state=True, save_path='/data/experiments/pruning_rbm/mnist/pretraining/', save_after_each_epoch=False, verbose=False) print('RBM3:') logZ, avg_logp = rbm3.fit(X_2, X_2_test_binary, retrain=True) print('baseline:logZ:%f, average logp:%f' % (logZ, avg_logp)) pruning_iter_probability(rbm3, 7, 0.3, X_2, X_2_test_binary) rbm3._save_weights_mask('classification_w3_pruned_92') weights.append(rbm3._sess.run(rbm3._w)) weights.append(rbm3._sess.run(rbm3._hb)) masks.append(rbm3._sess.run(rbm3._mask)) rbm3._sess.close() return weights, masks
action='store_true', help='to use verbose display training process') args = parser.parse_args() if not os.path.isdir(args.save_path): os.mkdir(args.save_path) X, labels = load_OCR_letters(mode='train') X_test, test_labels = load_OCR_letters(mode='test') X = X.astype(args.precision) X_test = X_test.astype(args.precision) batch = X_test[:10] rbm = BernoulliRBM(n_visible=128, n_hidden=args.n_hidden, precision=args.precision, algorithm=args.algorithm, anneal_lr=args.anneal_lr, n_gibbs_step=args.n_gibbs_steps, learning_rate=args.lr, \ use_momentum=args.momentum, momentum=[0.5,0.5,0.5,0.5,0.5,0.9], max_epoch=args.epochs, batch_size=args.batch_size, regularization=args.regularization, \ rl_coeff=1e-4, sample_h_state=True, sample_v_state=args.sample_v, save_path=args.save_path, save_after_each_epoch=False, sparsity=args.sparsity, \ sparsity_cost=1e-4, sparsity_target=0.1, sparsity_damping=0.9, verbose=args.verbose, img_shape=(16, 8)) # pruning in a single agreesive probability way without retraining def pruning_woretrain(rbm, ckpt): print( '\n\npruning in a single agreesive probability way without retraining\n\n' ) for sparsity in [0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]: rbm._load_weights(ckpt) logZ, avg_logp, sparsity = rbm.pruning_weight(X, X_test, sparsity) print('after pruning,sparsity:%f,logZ:%f,average_logp:%f' % (sparsity, logZ, avg_logp)) rbm.reset_mask()
def unsupervised_pretrain(): weights = [] masks = [] g_1 = tf.Graph() with g_1.as_default(): rbm1 = BernoulliRBM(n_visible=128, n_hidden=1000, precision='float32', algorithm='CD', anneal_lr=False, learning_rate=0.01, \ use_momentum=True, momentum=[0.5,0.5,0.5,0.5,0.5,0.9], max_epoch=50, batch_size=100, regularization='L2', \ rl_coeff=1e-4, sample_h_state=True, sample_v_state=True, save_path='/data/experiments/pruning_rbm/ocr_letters/classification/', save_after_each_epoch=False, verbose=False) print('RBM1:') logZ, avg_logp = rbm1.fit(X, X_test, retrain=True) print('baseline:logZ:%f, average logp:%f' % (logZ, avg_logp)) pruning_iter_probability(rbm1, 4, 0.3, X, X_test) weights.append(rbm1._sess.run(rbm1._w)) weights.append(rbm1._sess.run(rbm1._hb)) masks.append(rbm1._sess.run(rbm1._mask)) X_1 = rbm1._sess.run(rbm1._h_means_given_v(X)) X_1_binary = rbm1._sess.run(rbm1._sample_h(rbm1._h_means_given_v(X))) X_1_test_binary = rbm1._sess.run( rbm1._sample_h(rbm1._h_means_given_v(X_test))) rbm1._sess.close() g_2 = tf.Graph() with g_2.as_default(): rbm2 = BernoulliRBM(n_visible=1000, n_hidden=1000, precision='float32', algorithm='CD', anneal_lr=False, learning_rate=0.01, \ use_momentum=True, momentum=[0.5,0.5,0.5,0.5,0.5,0.9], max_epoch=50, batch_size=100, regularization='L2', \ rl_coeff=1e-4, sample_h_state=True, sample_v_state=True, save_path='/data/experiments/pruning_rbm/ocr_letters/classification/', save_after_each_epoch=False, verbose=False) print('RBM2:') logZ, avg_logp = rbm2.fit(X_1, X_1_test_binary, retrain=True) print('baseline:logZ:%f, average logp:%f' % (logZ, avg_logp)) pruning_iter_probability(rbm2, 4, 0.3, X_1, X_1_test_binary) weights.append(rbm2._sess.run(rbm2._w)) weights.append(rbm2._sess.run(rbm2._hb)) masks.append(rbm2._sess.run(rbm2._mask)) rbm2._sess.close() return weights, masks