Beispiel #1
0
    with io.open(args.glove_in, 'r', encoding="utf-8") as f:
        vectors = {}
        for line in f:
            vals = line.rstrip().split(' ')
            vectors[vals[0]] = [float(x) for x in vals[1:]]

    print("Mapping glove...")
    glove_dict = {}
    not_in_dict = {}
    for _set in [trainset, validset, testdevset, testset]:
        for g in _set.games:
            words = tokenizer.tokenize(g.question)
            for w in words:
                w = w.lower()
                w = w.replace("'s", "")
                if w in vectors:
                    glove_dict[w] = vectors[w]
                else:
                    not_in_dict[w] = 1

    print("Number of glove: {}".format(len(glove_dict)))
    print("Number of words with no glove: {}".format(len(not_in_dict)))

    for k in not_in_dict.keys():
        print(k)

    print("Dumping file...")
    pickle_dump(glove_dict, args.glove_out)

    print("Done!")
Beispiel #2
0
                                      shuffle=False)
            valid_loss, valid_accuracy = evaluator.process(sess,
                                                           valid_iterator,
                                                           outputs=outputs)

            logger.info("Training loss: {}".format(train_loss))
            logger.info("Training error: {}".format(1 - train_accuracy))
            logger.info("Validation loss: {}".format(valid_loss))
            logger.info("Validation error: {}".format(1 - valid_accuracy))

            if valid_accuracy > best_val_err:
                best_train_err = train_accuracy
                best_val_err = valid_accuracy
                saver.save(sess, save_path.format('params.ckpt'))
                logger.info("Guesser checkpoint saved...")

                pickle_dump({'epoch': t}, save_path.format('status.pkl'))

        # Load early stopping
        saver.restore(sess, save_path.format('params.ckpt'))
        test_iterator = Iterator(testset,
                                 pool=cpu_pool,
                                 batch_size=batch_size,
                                 batchifier=batchifier,
                                 shuffle=True)
        [test_loss, test_accuracy] = evaluator.process(sess, test_iterator,
                                                       outputs)

        logger.info("Testing loss: {}".format(test_loss))
        logger.info("Testing error: {}".format(1 - test_accuracy))
Beispiel #3
0
        print("Load dataset -> set: {}".format(one_set))
        dataset = OracleDataset.load(args.data_dir, one_set, image_loader=image_loader, crop_loader=crop_loader)
        batchifier = OracleBatchifier(tokenizer=None, sources=[source])
        iterator = Iterator(dataset,
                            batch_size=args.batch_size,
                            pool=cpu_pool,
                            batchifier=batchifier)

        for batch in tqdm(iterator):
            feat = sess.run(end_points[feature_name], feed_dict={images: numpy.array(batch[source])})
            for f, game in zip(feat, batch["raw"]):
                f = f.squeeze()

                if args.mode == "crop":
                    id =  game.object_id
                else:
                    id = game.picture.id


                if args.network == "resnet":
                    np.savez_compressed(os.path.join(out_dir, "{}.npz".format(id)), x="features")
                else:
                    features[id] = f

if args.network == "vgg":
    print("Dump file...")
    pickle_dump(features, out_dir)

print("Done!")