with io.open(args.glove_in, 'r', encoding="utf-8") as f: vectors = {} for line in f: vals = line.rstrip().split(' ') vectors[vals[0]] = [float(x) for x in vals[1:]] print("Mapping glove...") glove_dict = {} not_in_dict = {} for _set in [trainset, validset, testdevset, testset]: for g in _set.games: words = tokenizer.tokenize(g.question) for w in words: w = w.lower() w = w.replace("'s", "") if w in vectors: glove_dict[w] = vectors[w] else: not_in_dict[w] = 1 print("Number of glove: {}".format(len(glove_dict))) print("Number of words with no glove: {}".format(len(not_in_dict))) for k in not_in_dict.keys(): print(k) print("Dumping file...") pickle_dump(glove_dict, args.glove_out) print("Done!")
shuffle=False) valid_loss, valid_accuracy = evaluator.process(sess, valid_iterator, outputs=outputs) logger.info("Training loss: {}".format(train_loss)) logger.info("Training error: {}".format(1 - train_accuracy)) logger.info("Validation loss: {}".format(valid_loss)) logger.info("Validation error: {}".format(1 - valid_accuracy)) if valid_accuracy > best_val_err: best_train_err = train_accuracy best_val_err = valid_accuracy saver.save(sess, save_path.format('params.ckpt')) logger.info("Guesser checkpoint saved...") pickle_dump({'epoch': t}, save_path.format('status.pkl')) # Load early stopping saver.restore(sess, save_path.format('params.ckpt')) test_iterator = Iterator(testset, pool=cpu_pool, batch_size=batch_size, batchifier=batchifier, shuffle=True) [test_loss, test_accuracy] = evaluator.process(sess, test_iterator, outputs) logger.info("Testing loss: {}".format(test_loss)) logger.info("Testing error: {}".format(1 - test_accuracy))
print("Load dataset -> set: {}".format(one_set)) dataset = OracleDataset.load(args.data_dir, one_set, image_loader=image_loader, crop_loader=crop_loader) batchifier = OracleBatchifier(tokenizer=None, sources=[source]) iterator = Iterator(dataset, batch_size=args.batch_size, pool=cpu_pool, batchifier=batchifier) for batch in tqdm(iterator): feat = sess.run(end_points[feature_name], feed_dict={images: numpy.array(batch[source])}) for f, game in zip(feat, batch["raw"]): f = f.squeeze() if args.mode == "crop": id = game.object_id else: id = game.picture.id if args.network == "resnet": np.savez_compressed(os.path.join(out_dir, "{}.npz".format(id)), x="features") else: features[id] = f if args.network == "vgg": print("Dump file...") pickle_dump(features, out_dir) print("Done!")