def save_plots(in_dir, out_dir, name, ignore_incomplete):

    stat_logger = create_logger(out_dir, name)
    dataset = Dataset(in_dir, name)

    if ignore_incomplete:
        dataset.games = [
            g for g in dataset.games
            if g.status == "success" or g.status == "failure"
        ]

    for prototype in prototypes:
        p = prototype(out_dir, dataset.games, stat_logger, name)
        p.save_as_pdf()
예제 #2
0
    image_builder, crop_builder = None, None

    # Load image
    logger.info('Loading images..')
    use_resnet = False
    if 'image' in config['model']:
        logger.info('Loading images..')
        image_builder = get_img_builder(config['model']['image'], args.img_dir)
        use_resnet = image_builder.is_raw_image()

        assert False, "Guesser + Image is not yet available"

    # Load data
    logger.info('Loading data..')
    trainset = Dataset(args.data_dir, "train", image_builder, crop_builder)
    validset = Dataset(args.data_dir, "valid", image_builder, crop_builder)
    testset = Dataset(args.data_dir, "test", image_builder, crop_builder)

    # Load dictionary
    logger.info('Loading dictionary..')
    tokenizer = GWTokenizer(os.path.join(args.data_dir, args.dict_file))

    # Build Network
    logger.info('Building network..')
    network = GuesserNetwork(config['model'], num_words=tokenizer.no_words)

    # Build Optimizer
    logger.info('Building optimizer..')
    optimizer, outputs = create_optimizer(network, config["optimizer"])
예제 #3
0
        logger.info('Loading images..')
        image_builder = get_img_builder(config['model']['image'], args.img_dir)
        use_resnet = image_builder.is_raw_image()

    if config["model"]['inputs'].get('crop', False):
        logger.info('Loading crops..')
        crop_builder = get_img_builder(config['model']['crop'],
                                       args.crop_dir,
                                       is_crop=True)
        use_resnet = crop_builder.is_raw_image()
        use_resnet |= image_builder.is_raw_image()
        use_process |= image_builder.require_multiprocess()

    # Load data
    logger.info('Loading data..')
    trainset = Dataset(args.data_dir, "train", image_builder, crop_builder,
                       rcnn, args.no_games_to_load)
    validset = Dataset(args.data_dir, "valid", image_builder, crop_builder,
                       rcnn, args.no_games_to_load)
    testset = Dataset_visg("/home/xzp/guesswhat_v2/data/nag2.json",
                           image_builder, crop_builder, rcnn,
                           args.no_games_to_load)

    # Load dictionary
    logger.info('Loading dictionary..')
    tokenizer = GWTokenizer(args.dict_file)

    # Load glove
    glove = None
    # if config["model"]["question"]['glove']:
    #     logger.info('Loading glove..')
    #     glove = GloveEmbeddings(args.glove_file)
예제 #4
0
    parser.add_argument("-name",
                        type=str,
                        help="Output directory",
                        required=True)
    parser.add_argument("-normalize",
                        type=bool,
                        help="normalize word representation",
                        required=True)
    parser.add_argument("-ignore_incomplete",
                        type=bool,
                        default=True,
                        help="Ignore incomplete games in the dataset")

    args = parser.parse_args()

    dataset = Dataset(args.data_dir, args.name)
    games = [
        g for g in dataset.games
        if g.status == "success" or g.status == "failure"
    ]

    N = len(games)

    data = np.zeros((5, 5))

    questions = []
    for game in games:
        questions.append(game.questions)
    questions = list(itertools.chain(*questions))

    # Do the tfidf
예제 #5
0
        logger.info('Loading images..')
        image_builder = get_img_builder(config['model']['image'], args.img_dir)
        use_resnet = image_builder.is_raw_image()
        use_process |= image_builder.require_multiprocess()

    if config["model"]['inputs'].get('crop', False):
        logger.info('Loading crops..')
        crop_builder = get_img_builder(config['model']['crop'],
                                       args.crop_dir,
                                       is_crop=True)
        use_resnet = crop_builder.is_raw_image()
        use_process |= crop_builder.require_multiprocess()

    # Load data
    logger.info('Loading data..')
    trainset = Dataset(args.data_dir, "train", image_builder, crop_builder,
                       False, args.no_games_to_load)
    validset = Dataset(args.data_dir, "valid", image_builder, crop_builder,
                       False, args.no_games_to_load)
    testset = Dataset(args.data_dir, "test", image_builder, crop_builder,
                      False, args.no_games_to_load)

    # Load dictionary
    logger.info('Loading dictionary..')
    tokenizer = GWTokenizer(args.dict_file)

    # Load glove
    glove = None
    if config["model"]["question"]['glove']:
        logger.info('Loading glove..')
        glove = GloveEmbeddings(args.glove_file)
예제 #6
0
                                   k_best=loop_config['loop']['beam_k_best'])

        looper_evaluator = BasicLooper(
            loop_config,
            oracle_wrapper=oracle_wrapper,
            guesser_wrapper=guesser_wrapper,
            qgen_wrapper=qgen_wrapper,
            tokenizer=tokenizer,
            batch_size=loop_config["optimizer"]["batch_size"])

        # Compute the initial scores
        logger.info(">>>-------------- INITIAL SCORE ---------------------<<<")

        for split in ["nd_test", "nd_valid", "od_test", "od_valid"]:
            logger.info("Loading dataset split {}".format(split))
            testset = Dataset(args.data_dir, split, "guesswhat_nocaps",
                              image_builder, crop_builder)

            logger.info(">>>  New Games  <<<")
            dump_suffix = "gameplay_{}_{}".format(
                split, "rl" if args.rl_identifier else "sl")
            compute_qgen_accuracy(sess,
                                  testset,
                                  batchifier=eval_batchifier,
                                  evaluator=looper_evaluator,
                                  tokenizer=tokenizer,
                                  mode=mode_to_evaluate,
                                  save_path=save_path,
                                  cpu_pool=cpu_pool,
                                  batch_size=batch_size,
                                  store_games=args.store_games,
                                  dump_suffix=dump_suffix)
예제 #7
0
    # Load all  networks configs
    logger = logging.getLogger()

    ###############################
    #  LOAD DATA
    #############################

    # Load image
    logger.info('Loading images..')
    image_builder = _create_image_builder_rcnn()
    crop_builder = None

    # Load data
    logger.info('Loading data..')
    # trainset = Dataset(args.data_dir, "train", image_builder, crop_builder)
    validset = Dataset(args.data_dir, "valid", image_builder, crop_builder, True, 10)
    # testset = Dataset(args.data_dir, "test", image_builder, crop_builder)

    dataset = validset
    dataset.games = validset.games
    # dataset, dummy_dataset = trainset, validset
    # dataset.games = trainset.games + validset.games + testset.games
    # dummy_dataset.games = []

    # hack dataset to only keep one game by image
    image_id_set = {}
    games = []
    for game in dataset.games:
        if game.image.id not in image_id_set:
            games.append(game)
            image_id_set[game.image.id] = 1
예제 #8
0
        '<padding>': 0,
        '<start>': 1,
        '<stop>': 2,
        # '<stop_dialogue>': 3,
        '<unk>': 3,
        '<yes>': 4,
        '<no>': 5,
        '<n/a>': 6,
    }

    word2occ = collections.defaultdict(int)

    tknzr = TweetTokenizer(preserve_case=False)

    print("Processing train dataset...")
    trainset = Dataset(args.data_dir, "train")
    games = trainset.get_data()
    for game in games:
        question = game.questions[0]
        tokens = tknzr.tokenize(question)
        for tok in tokens:
            word2occ[tok] += 1

    print("filter words...")
    for word, occ in word2occ.items():
        if occ >= args.min_occ and word.count('.') <= 1:
            word2i[word] = len(word2i)

    print("Number of words (occ >= 1): {}".format(len(word2occ)))
    print("Number of words (occ >= {}): {}".format(args.min_occ, len(word2i)))