def main(): parser = argparse.ArgumentParser() parser.add_argument('--save', type=str, default='work/icnn.ebundle') parser.add_argument('--nEpoch', type=float, default=50) parser.add_argument('--trainBatchSz', type=int, default=128) # parser.add_argument('--testBatchSz', type=int, default=2048) parser.add_argument('--seed', type=int, default=42) parser.add_argument('--layerSizes', type=int, nargs='+', default=[600, 600]) parser.add_argument('--dataset', type=str, choices=['bibtex', 'bookmarks', 'delicious'], default='bibtex') parser.add_argument('--valSplit', type=float, default=0) parser.add_argument('--inference_nIter', type=int, default=10) args = parser.parse_args() setproctitle.setproctitle('bamos.icnn.ebundle.{}.{}'.format( args.dataset, ','.join(str(x) for x in args.layerSizes))) npr.seed(args.seed) tf.set_random_seed(args.seed) save = os.path.expanduser(args.save) if os.path.isdir(save): shutil.rmtree(save) os.makedirs(save) if args.dataset == 'bibtex': data = bibsonomy.loadBibtex("data/bibtex") elif args.dataset == 'bookmarks': data = bibsonomy.loadBookmarks("data/bookmarks") elif args.dataset == 'delicious': data = bibsonomy.loadDelicious("data/delicious") else: assert (False) # with open('work-ff/best.logits.pkl', 'rb') as f: # data = pkl.load(f) nTest = data['testX'].shape[0] nFeatures = data['trainX'].shape[1] nLabels = data['trainY'].shape[1] nXy = nFeatures + nLabels nTrain_orig = data['trainX'].shape[0] nVal = int(args.valSplit * nTrain_orig) nTrain = nTrain_orig - nVal if args.valSplit > 0: I = npr.permutation(nTrain_orig) trainI, valI = I[:nTrain], I[nVal:] trainX = data['trainX'][trainI, :] trainY = data['trainY'][trainI, :] valX = data['trainX'][valI, :] valY = data['trainY'][valI, :] else: trainX = data['trainX'] trainY = data['trainY'] print("\n\n" + "=" * 40) print("+ nTrain: {}, nTest: {}".format(nTrain, nTest)) print("+ nFeatures: {}, nLabels: {}".format(nFeatures, nLabels)) print("=" * 40 + "\n\n") config = tf.ConfigProto(log_device_placement=False) config.gpu_options.allow_growth = True with tf.Session(config=config) as sess: model = Model(nFeatures, nLabels, args.layerSizes, sess) if args.valSplit > 0: model.train(args, trainX, trainY, valX, valY) else: model.train(args, trainX, trainY, data['testX'], data['testY'])
def main(): parser = argparse.ArgumentParser() parser.add_argument('chkpt', type=str) parser.add_argument('--save', type=str, default='work') parser.add_argument('--layerSizes', type=int, nargs='+', default=[600, 600]) parser.add_argument('--seed', type=int, default=42) parser.add_argument('--dataset', type=str, choices=['bibtex', 'bookmarks', 'delicious'], default='bibtex') args = parser.parse_args() setproctitle.setproctitle('bamos.icnn.ebundle-vs-gd.{}.{}'.format( args.dataset, ','.join(str(x) for x in args.layerSizes))) npr.seed(args.seed) tf.set_random_seed(args.seed) if args.dataset == 'bibtex': data = bibsonomy.loadBibtex("data/bibtex") elif args.dataset == 'bookmarks': data = bibsonomy.loadBookmarks("data/bookmarks") elif args.dataset == 'delicious': data = bibsonomy.loadDelicious("data/delicious") else: assert (False) nTest = data['testX'].shape[0] nFeatures = data['trainX'].shape[1] nLabels = data['trainY'].shape[1] nXy = nFeatures + nLabels nTrain = data['trainX'].shape[0] trainX = data['trainX'] trainY = data['trainY'] config = tf.ConfigProto(log_device_placement=False) config.gpu_options.allow_growth = True with tf.Session(config=config) as sess: model = icnn_ebundle.Model(nFeatures, nLabels, args.layerSizes, sess) model.load(args.chkpt) nSamples = 10 # Bundle Entropy bundleIter, bundleTime, bundleEs = [], [], [] def fg(yhats): fd = {model.x_: xBatch, model.y_: yhats} e, ge = model.sess.run([model.E_, model.dE_dy_], feed_dict=fd) return e, ge def cb(iterNum, es, x): bundleIter.append(iterNum) print(np.mean(es)) es_entr = es - entr(x) bundleEs.append(np.mean(es_entr)) bundleTime.append(time.time() - start) start = time.time() I = npr.randint(nTrain, size=nSamples) xBatch = trainX[I, :] yBatch = trainY[I, :] y0 = np.full(yBatch.shape, 0.5) yN, G, h, lam, ys, nIters = bundle_entropy.solveBatch(fg, y0, nIter=10, callback=cb) # PGD pgdIter, pgdTime, pgdEs = {}, {}, {} def fg(yhats): fd = {model.x_: xBatch, model.y_: yhats} e, ge = model.sess.run([model.E_entr_, model.dE_entr_dy_], feed_dict=fd) return e, ge def proj(x): return np.clip(x, 1e-6, 1. - 1e-6) lrs = [0.1, 0.01, 0.001] for lr in lrs: pgdIter[lr] = [] pgdTime[lr] = [] pgdEs[lr] = [] def cb(iterNum, es, gs, bestM): pgdIter[lr].append(iterNum) pgdEs[lr].append(np.mean(es)) pgdTime[lr].append(time.time() - start) start = time.time() y0 = np.full(yBatch.shape, 0.5) bamos_opt.pgd.solve_batch(fg, proj, y0, lr=lr, rollingDecay=0.5, eps=1e-3, minIter=10, maxIter=10, callback=cb) fig, ax = plt.subplots(1, 1) plt.xlabel('Iteration') plt.ylabel('Entropy-Scaled Objective') for lr in lrs: plt.plot(pgdIter[lr], pgdEs[lr], label='PGD, lr={}'.format(lr)) plt.plot(bundleIter, bundleEs, label='Bundle Entropy', color='k', linestyle='dashed') plt.legend() ax.set_yscale('log') for ext in ['png', 'pdf']: fname = os.path.join(args.save, 'obj.' + ext) plt.savefig(fname) print("Created {}".format(fname))
def main(): parser = argparse.ArgumentParser() parser.add_argument('--save', type=str, default='work-ff') parser.add_argument('--nEpoch', type=float, default=100) parser.add_argument('--trainBatchSz', type=int, default=128) parser.add_argument('--testBatchSz', type=int, default=2048) parser.add_argument('--seed', type=int, default=42) parser.add_argument('--layerSizes', type=int, nargs='+', default=[600]) # parser.add_argument('--saveFeatures', action='store_true') parser.add_argument('--dataset', type=str, choices=['bibtex', 'bookmarks', 'delicious'], default='bibtex') parser.add_argument('--valSplit', type=float, default=0) args = parser.parse_args() assert (args.valSplit < 1.) setproctitle.setproctitle('bamos.ff.{}.{}'.format( args.dataset, ','.join(str(x) for x in args.layerSizes))) npr.seed(args.seed) tf.set_random_seed(args.seed) save = os.path.expanduser(args.save) if not os.path.isdir(save): os.makedirs(save, exist_ok=True) if args.dataset == 'bibtex': data = bibsonomy.loadBibtex("data/bibtex") elif args.dataset == 'bookmarks': data = bibsonomy.loadBookmarks("data/bookmarks") elif args.dataset == 'delicious': data = bibsonomy.loadDelicious("data/delicious") else: assert (False) nFeatures = data['trainX'].shape[1] nLabels = data['trainY'].shape[1] nXy = nFeatures + nLabels x_ = tf.placeholder(tf.float32, shape=(None, nFeatures), name='x') y_ = tf.placeholder(tf.float32, shape=(None, nLabels), name='y') net = x_ nTrain_orig = data['trainX'].shape[0] nVal = int(args.valSplit * nTrain_orig) nTrain = nTrain_orig - nVal if args.valSplit > 0: I = npr.permutation(nTrain_orig) trainI, valI = I[:nTrain], I[nVal:] trainX = data['trainX'][trainI, :] trainY = data['trainY'][trainI, :] valX = data['trainX'][valI, :] valY = data['trainY'][valI, :] testDf = tflearn.data_flow.FeedDictFlow({ x_: valX, y_: valY }, tf.train.Coordinator(), batch_size=args.testBatchSz) else: trainX = data['trainX'] trainY = data['trainY'] testDf = tflearn.data_flow.FeedDictFlow( { x_: data['testX'], y_: data['testY'] }, tf.train.Coordinator(), batch_size=args.testBatchSz) # nTrain = data['trainX'].shape[0] nTest = data['testX'].shape[0] print("\n\n" + "=" * 40) print("+ nTrain: {}, nVal: {}, nTest: {}".format(nTrain, nVal, nTest)) print("+ nFeatures: {}, nLabels: {}".format(nFeatures, nLabels)) print("=" * 40 + "\n\n") nIter = int(args.nEpoch * np.ceil(nTrain / args.trainBatchSz)) print(nIter) with tf.variable_scope("FeedForward") as scope: for sz in args.layerSizes: std = 1.0 / np.sqrt(sz) net = tflearn.fully_connected( net, sz, activation='relu', weight_decay=0, weights_init=tfi.uniform(None, -std, std), bias_init=tfi.uniform(None, -std, std)) net = tflearn.layers.normalization.batch_normalization(net) features_ = net sz = nLabels std = 1.0 / np.sqrt(sz) logits_ = tflearn.fully_connected( net, sz, activation='linear', weight_decay=0, weights_init=tfi.uniform(None, -std, std), bias_init=tfi.uniform(None, -std, std)) yhat_ = tf.sigmoid(logits_) ff_vars = tf.all_variables() # loss_ = tf.reduce_mean(tf.square(y_ - yhat_)) loss_ = -tf.reduce_sum(y_*tf.log(yhat_+1e-10)) \ -tf.reduce_sum((1.-y_)*tf.log(1.-yhat_+1e-10)) train_step = tf.train.AdamOptimizer(0.001).minimize(loss_) trainFields = ['iter', 'f1', 'loss'] trainF = open(os.path.join(save, 'train.csv'), 'w') trainW = csv.writer(trainF) trainW.writerow(trainFields) testFields = ['iter', 'f1', 'loss'] testF = open(os.path.join(save, 'test.csv'), 'w') testW = csv.writer(testF) testW.writerow(testFields) meta = { 'nEpoch': args.nEpoch, 'nIter': nIter, 'nTrain': nTrain, 'nVal': nVal, 'nTest': nTest, 'trainBatchSz': args.trainBatchSz, 'layerSizes': args.layerSizes } ff_saver = tf.train.Saver(ff_vars, max_to_keep=1) config = tf.ConfigProto() config.gpu_options.allow_growth = True with tf.Session(config=config) as sess: graphWriter = tf.train.SummaryWriter(os.path.join(save, 'graph'), sess.graph) sess.run(tf.initialize_all_variables()) nParams = np.sum(v.get_shape().num_elements() for v in tf.trainable_variables()) meta['nParams'] = nParams with open(os.path.join(args.save, 'meta.json'), 'w') as f: json.dump(meta, f, indent=2) bestTestF1 = 0 for i in range(nIter): tflearn.is_training(True) I = npr.randint(nTrain, size=args.trainBatchSz) xBatch = trainX[I, :] yBatch = trainY[I, :] _, loss, yPred = sess.run([train_step, loss_, yhat_], feed_dict={ x_: xBatch, y_: yBatch }) trainF1 = util.macroF1(yBatch, yPred) trainW.writerow((i, trainF1, loss)) trainF.flush() if (i + 1) % np.ceil((nTrain / 1.0) / args.trainBatchSz) == 0: print("=== Iteration {} (Epoch {:.2f}) ===".format( i, i / np.ceil(nTrain / args.trainBatchSz))) print(" + train F1: {:0.4f}".format(trainF1)) print(" + train loss: {:0.2e}".format(loss)) if (i + 1) % np.ceil(nTrain / args.trainBatchSz) == 0: tflearn.is_training(False) # testF1, testLoss = tfh.trainer.evaluate_flow(sess, [F1_, loss_], testDf)[0] yPred, testLoss = sess.run([yhat_, loss_], feed_dict={ x_: data['testX'], y_: data['testY'] }) testF1 = util.macroF1(data['testY'], yPred) print(" + testF1: {:0.4f}".format(testF1)) print(" + testLoss: {:0.2e}".format(testLoss)) testW.writerow((i, testF1, testLoss)) testF.flush() # os.system('./icnn.plot.py ' + args.save) if testF1 > bestTestF1: bestTestF1 = testF1 # print(" + Saving new best model.") # ff_saver.save(sess, os.path.join(args.save, 'best.tf')) # trainL = sess.run(logits_, feed_dict={x_: data['trainX']}) # testL = sess.run(logits_, feed_dict={x_: data['testX']}) # p = os.path.join(args.save, 'best.logits.pkl') # print(" + Writing logits to: ", p) # featuresData = {'trainX': trainL, 'trainY': data['trainY'], # 'testX': testL, 'testY': data['testY']} # with open(p, 'wb') as f: # pkl.dump(featuresData, f) trainF.close() testF.close() with open(os.path.join(args.save, 'meta.json'), 'w') as f: json.dump(meta, f, indent=2) os.system('./icnn.plot.py ' + args.save)