Beispiel #1
0
def compute_qgen_accuracy(sess, dataset, batchifier, evaluator, mode,
                          tokenizer, save_path, cpu_pool, batch_size,
                          store_games, dump_suffix):

    logger = logging.getLogger()

    for m in mode:
        if m != "beam_search":
            test_iterator = Iterator(dataset,
                                     pool=cpu_pool,
                                     batch_size=batch_size,
                                     batchifier=batchifier,
                                     shuffle=False,
                                     use_padding=True)
            test_score = evaluator.process(sess,
                                           test_iterator,
                                           mode=m,
                                           store_games=store_games)

            # Retrieve the generated games and dump them as a dataset
            if store_games:
                generated_dialogues = evaluator.get_storage()
                dump_samples_into_dataset(generated_dialogues,
                                          save_path=save_path,
                                          tokenizer=tokenizer,
                                          name=dump_suffix + "." + m)

            logger.info("Accuracy ({} - {}): {}".format(
                dataset.set, m, test_score))
Beispiel #2
0
def test_qgen(sess, testset, tokenizer, qgen, cpu_pool, batch_size, logger):
    qgen_sources = qgen.get_sources(sess)
    qgen_evaluator = Evaluator(qgen_sources, qgen.scope_name, network=qgen, tokenizer=tokenizer)
    qgen_batchifier = QuestionerBatchifier(tokenizer, qgen_sources, status=('success',))
    qgen_iterator = Iterator(testset, pool=cpu_pool,
                                 batch_size=batch_size,
                                 batchifier=qgen_batchifier)
    [qgen_loss] = qgen_evaluator.process(sess, qgen_iterator, outputs=[qgen.ml_loss])
    logger.info("QGen test loss: {}".format(qgen_loss))
Beispiel #3
0
def test_guesser(sess, testset, tokenizer, guesser, cpu_pool, batch_size, logger):
    guesser_sources = guesser.get_sources(sess)
    guesser_evaluator = Evaluator(guesser_sources, guesser.scope_name, network=guesser, tokenizer=tokenizer)
    guesser_batchifier = QuestionerBatchifier(tokenizer, guesser_sources, status=('success',))
    guesser_iterator = Iterator(testset, pool=cpu_pool,
                             batch_size=batch_size,
                             batchifier=guesser_batchifier)
    [guesser_loss, guesser_error] = guesser_evaluator.process(sess, guesser_iterator, [guesser.loss, guesser.error])
    logger.info("Guesser test loss: {}".format(guesser_loss))
    logger.info("Guesser test error: {}".format(guesser_error))
Beispiel #4
0
def test_oracle(sess, testset, tokenizer, oracle, cpu_pool, batch_size, logger):

    oracle_dataset = OracleDataset(testset)
    oracle_sources = oracle.get_sources(sess)
    oracle_evaluator = Evaluator(oracle_sources, oracle.scope_name, network=oracle, tokenizer=tokenizer)
    oracle_batchifier = OracleBatchifier(tokenizer, oracle_sources, status=('success',))
    oracle_iterator = Iterator(oracle_dataset, pool=cpu_pool,
                             batch_size=batch_size,
                             batchifier=oracle_batchifier)
    [oracle_loss, oracle_error] = oracle_evaluator.process(sess, oracle_iterator, [oracle.loss, oracle.error])

    logger.info("Oracle test loss: {}".format(oracle_loss))
    logger.info("Oracle test error: {}".format(oracle_error))
Beispiel #5
0
        # create training tools
        evaluator = Evaluator(sources,
                              network.scope_name,
                              network=network,
                              tokenizer=tokenizer)
        batchifier = QuestionerBatchifier(tokenizer,
                                          sources,
                                          status=('success', ))

        for t in range(start_epoch, no_epoch):
            logger.info('Epoch {}..'.format(t + 1))

            train_iterator = Iterator(trainset,
                                      batch_size=batch_size,
                                      pool=cpu_pool,
                                      batchifier=batchifier,
                                      shuffle=True)
            train_loss, train_accuracy = evaluator.process(sess,
                                                           train_iterator,
                                                           outputs=outputs +
                                                           [optimizer])

            valid_iterator = Iterator(validset,
                                      pool=cpu_pool,
                                      batch_size=batch_size * 2,
                                      batchifier=batchifier,
                                      shuffle=False)
            valid_loss, valid_accuracy = evaluator.process(sess,
                                                           valid_iterator,
                                                           outputs=outputs)
Beispiel #6
0
def extract_raw(
    image_shape,
    dataset_cstor,
    dataset_args,
    batchifier_cstor,
    source_name,
    out_dir,
    set_type,
    no_threads,
):

    for one_set in set_type:

        ############################
        #   LOAD DATASET
        ############################

        print("Load dataset...")
        dataset_args["which_set"] = one_set
        dataset = dataset_cstor(**dataset_args)

        # hack dataset to only keep one game by image
        image_id_set = {}
        games = []
        for game in dataset.games:
            if game.image.id not in image_id_set:
                games.append(game)
                image_id_set[game.image.id] = 1

        dataset.games = games
        no_images = len(games)

        # prepare batch builder
        dummy_tokenizer = DummyTokenizer()
        batchifier = batchifier_cstor(tokenizer=dummy_tokenizer,
                                      sources=[source_name])
        cpu_pool = Pool(no_threads, maxtasksperchild=1000)
        iterator = Iterator(dataset,
                            batch_size=64,
                            pool=cpu_pool,
                            batchifier=batchifier)

        filepath = os.path.join(out_dir, "{}_features.h5".format(one_set))
        with h5py.File(filepath, 'w') as f:

            feat_dataset = f.create_dataset('features',
                                            shape=[no_images] + image_shape,
                                            dtype=np.float32)
            idx2img = f.create_dataset('idx2img',
                                       shape=[no_images],
                                       dtype=np.int32)
            pt_hd5 = 0

            for batch in tqdm(iterator):

                # Store dataset
                batch_size = len(batch["raw"])
                feat_dataset[pt_hd5:pt_hd5 + batch_size] = batch[source_name]

                # Store idx to image.id
                for i, game in enumerate(batch["raw"]):
                    idx2img[pt_hd5 + i] = game.image.id

                # update hd5 pointer
                pt_hd5 += batch_size

            print("Start dumping file: {}".format(filepath))
        print("Finished dumping file: {}".format(filepath))

    print("Done!")
def extract_features(
        img_input,
        ft_output,
        network_ckpt, 
        dataset_cstor,
        dataset_args,
        batchifier_cstor,
        out_dir,
        set_type,
        batch_size,
        no_threads,
        gpu_ratio):

    # CPU/GPU option
    cpu_pool = Pool(no_threads, maxtasksperchild=1000)
    gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=gpu_ratio)



    with tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, allow_soft_placement=True)) as sess:
        saver = tf.train.Saver()
        saver.restore(sess, network_ckpt)
    
        for one_set in set_type:
    
            print("Load dataset -> set: {}".format(one_set))
            dataset_args["which_set"] = one_set
            dataset = dataset_cstor(**dataset_args)
    
            # hack dataset to only keep one game by image
            image_id_set = {}
            games = []
            for game in dataset.games:
                if game.image.id not in image_id_set:
                    games.append(game)
                    image_id_set[game.image.id] = 1

            dataset.games = games
            no_images = len(games)
    
            source_name = os.path.basename(img_input.name[:-2])
            dummy_tokenizer = DummyTokenizer()
            batchifier = batchifier_cstor(tokenizer=dummy_tokenizer, sources=[source_name])
            iterator = Iterator(dataset,
                                batch_size=batch_size,
                                pool=cpu_pool,
                                batchifier=batchifier)
    
            ############################
            #  CREATE FEATURES
            ############################
            print("Start computing image features...")
            filepath = os.path.join(out_dir, "{}_features.h5".format(one_set))
            with h5py.File(filepath, 'w') as f:

                ft_shape = [int(dim) for dim in ft_output.get_shape()[1:]]
                ft_dataset = f.create_dataset('features', shape=[no_images] + ft_shape, dtype=np.float32)
                idx2img = f.create_dataset('idx2img', shape=[no_images], dtype=np.int32)
                pt_hd5 = 0
    
                for batch in tqdm(iterator):
                    feat = sess.run(ft_output, feed_dict={img_input: numpy.array(batch[source_name])})
    
                    # Store dataset
                    batch_size = len(batch["raw"])
                    ft_dataset[pt_hd5: pt_hd5 + batch_size] = feat
    
                    # Store idx to image.id
                    for i, game in enumerate(batch["raw"]):
                        idx2img[pt_hd5 + i] = game.image.id
    
                    # update hd5 pointer
                    pt_hd5 += batch_size
                print("Start dumping file: {}".format(filepath))
            print("Finished dumping file: {}".format(filepath))
    
    
    print("Done!")
Beispiel #8
0
# CPU/GPU option
cpu_pool = Pool(args.no_thread, maxtasksperchild=1000)
gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=args.gpu_ratio)

with tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, allow_soft_placement=True)) as sess:
    saver = tf.train.Saver()
    saver.restore(sess, args.ckpt)

    features = dict()
    for one_set in args.set_type:

        print("Load dataset -> set: {}".format(one_set))
        dataset = OracleDataset.load(args.data_dir, one_set, image_loader=image_loader, crop_loader=crop_loader)
        batchifier = OracleBatchifier(tokenizer=None, sources=[source])
        iterator = Iterator(dataset,
                            batch_size=args.batch_size,
                            pool=cpu_pool,
                            batchifier=batchifier)

        for batch in tqdm(iterator):
            feat = sess.run(end_points[feature_name], feed_dict={images: numpy.array(batch[source])})
            for f, game in zip(feat, batch["raw"]):
                f = f.squeeze()

                if args.mode == "crop":
                    id =  game.object_id
                else:
                    id = game.picture.id


                if args.network == "resnet":
                    np.savez_compressed(os.path.join(out_dir, "{}.npz".format(id)), x="features")
        evaluator = Evaluator(loop_sources, qgen_network.scope_name, network=qgen_network, tokenizer=tokenizer)

        train_batchifier = LooperBatchifier(tokenizer, loop_sources, train=True)
        eval_batchifier = LooperBatchifier(tokenizer, loop_sources, train=False)

        # Initialize the looper to eval/train the game-simulation
        qgen_network.build_sampling_graph(qgen_config["model"], tokenizer=tokenizer, max_length=loop_config['loop']['max_depth'])
        looper_evaluator = BasicLooper(loop_config,
                                       oracle=oracle_network,
                                       guesser=guesser_network,
                                       qgen=qgen_network,
                                       tokenizer=tokenizer)

        test_iterator = Iterator(testset, pool=cpu_pool,
                                 batch_size=batch_size,
                                 batchifier=eval_batchifier,
                                 shuffle=False,
                                 use_padding=True)
        test_score = looper_evaluator.process(sess, test_iterator, mode="sampling")
        logger.info("Test success ratio (Init-Sampling): {}".format(test_score))

        logs = []
        # Start training
        final_val_score = 0.
        for epoch in range(no_epoch):

            logger.info("Epoch {}/{}".format(epoch, no_epoch))

            train_iterator = Iterator(trainset, batch_size=batch_size,
                                      pool=cpu_pool,
                                      batchifier=train_batchifier,
Beispiel #10
0
def extract_features(img_input, ft_output, network_ckpt, dataset_cstor,
                     dataset_args, batchifier_cstor, out_dir, set_type,
                     batch_size, no_threads, gpu_ratio):

    # CPU/GPU option
    cpu_pool = Pool(no_threads, maxtasksperchild=1000)
    gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=gpu_ratio)

    # gpu_options.allow_growth = True

    saver = tf.train.Saver()

    with tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) as sess:
        print("+++ 0")

        saver.restore(sess, network_ckpt)
        print("+++ 1")
        # tf.initialize_all_variables().run()
        for one_set in set_type:
            print("+++ 2")
            print("Load dataset -> set: {}".format(one_set))
            dataset_args["which_set"] = one_set
            dataset = dataset_cstor(**dataset_args)

            print("+++ 3")
            # hack dataset to only keep one game by image
            image_id_set = {}
            games = []

            print("+++ 4")

            nb_total_trouve = 0
            nb_nonTrouve = 0

            print("+++ 5")

            print("[Data Length ] = {}".format(len(dataset.games)))
            for game in dataset.games:
                if game.image.id not in image_id_set:
                    games.append(game)
                    image_id_set[game.image.id] = 1

            print("+++ 6")

            #         img = game.image.url.split("/")[-1]
            #         img = img + ".jpg"
            #
            #         print("[Img]= {}".format(img))
            #
            #         s = open(os.path.join("data/img/raw/", "42.jpg"))
            #
            #         try:
            #             print(str(img++".jpg"))
            #             f = open(os.path.join("data/img/raw/", img))
            #
            #             nb_total_trouve += 1
            #         except:
            #             # print("Not Found !!")
            #             nb_nonTrouve += 1
            # print("[Trouve = {} et Non_Trouve = {} ]".format(nb_total_trouve,nb_nonTrouve))
            # # print(img ++".jpg")

            dataset.games = games
            no_images = len(games)

            source_name = os.path.basename(img_input.name[:-2])
            dummy_tokenizer = DummyTokenizer()
            batchifier = batchifier_cstor(tokenizer_question=dummy_tokenizer,
                                          sources=[source_name])
            iterator = Iterator(dataset,
                                batch_size=batch_size,
                                pool=cpu_pool,
                                batchifier=batchifier)

            ############################
            #  CREATE FEATURES
            ############################

            print("Start computing image features...")
            filepath = os.path.join(out_dir, "{}_features.h5".format(one_set))

            with h5py.File(filepath, 'w') as f:
                print("--- 1")
                ft_shape = [int(dim) for dim in ft_output.get_shape()[1:]]
                print("--- 2")
                ft_dataset = f.create_dataset('features',
                                              shape=[no_images] + ft_shape,
                                              dtype=np.float32)
                print("--- 3")
                idx2img = f.create_dataset('idx2img',
                                           shape=[no_images],
                                           dtype=np.int32)
                print("--- 4")
                pt_hd5 = 0
                print("--- 5")

                for batch in tqdm(iterator):
                    # print("--- 6")
                    # print(" ... ",numpy.array(batch[source_name]),numpy.array(batch[source_name]).shape)
                    feat = sess.run(
                        ft_output,
                        feed_dict={img_input: numpy.array(batch[source_name])})

                    # Store dataset
                    #print("--- 7")
                    batch_size = len(batch["raw"])
                    ft_dataset[pt_hd5:pt_hd5 + batch_size] = feat

                    #print("--- 8")
                    # Store idx to image.id
                    for i, game in enumerate(batch["raw"]):
                        idx2img[pt_hd5 + i] = game.image.id

                    #print("--- 9")
                    # update hd5 pointer
                    pt_hd5 += batch_size
                print("Start dumping file: {}".format(filepath))
            print("Finished dumping file: {}".format(filepath))

    print("Done!")
Beispiel #11
0
from clevr.data_provider.clevr_dataset import CLEVRDataset
from clevr.data_provider.clevr_batchifier import CLEVRBatchifier

if __name__ == "__main__":

    feat_dir = "/media/datas2/tmp"
    data_dir = "/home/sequel/fstrub/clevr_data"

    image_builder = h5FeatureBuilder(img_dir=feat_dir, bufferize=False)

    print("Load datasets...")
    dataset = CLEVRDataset(folder=data_dir,
                           which_set="val",
                           image_builder=image_builder)

    cpu_pool = ThreadPool(1)

    dummy_tokenizer = DummyTokenizer()

    batchifier = CLEVRBatchifier(tokenizer=dummy_tokenizer, sources=["image"])
    iterator = Iterator(dataset,
                        batch_size=64,
                        pool=cpu_pool,
                        batchifier=batchifier)

    for batch in tqdm(iterator):
        pass

    print("Done!")

        logs = []
        # Start training
        final_val_score = 0.
        for epoch in range(no_epoch):
            if args.skip_training:
                logger.info("Skip training...")
                break
            logger.info("Epoch {}/{}".format(epoch, no_epoch))

            cpu_pool = create_cpu_pool(args.no_thread, use_process=False)

            train_iterator = Iterator(trainset,
                                      batch_size=batch_size,
                                      pool=cpu_pool,
                                      shuffle=True,
                                      batchifier=train_batchifier)

            [train_accuracy, _] = game_engine.process(sess, train_iterator,
                                                      optimizer=optimizer,
                                                      mode="sampling")

            valid_iterator = Iterator(validset, pool=cpu_pool,
                                      batch_size=batch_size,
                                      batchifier=eval_batchifier,
                                      shuffle=False)
            [val_accuracy, games] = game_engine.process(sess, valid_iterator, mode="sampling")

            logger.info("Accuracy (train - sampling) : {}".format(train_accuracy))
            logger.info("Accuracy (valid - sampling) : {}".format(val_accuracy))