def save_plots(in_dir, out_dir, name, ignore_incomplete): stat_logger = create_logger(out_dir, name) dataset = Dataset(in_dir, name) if ignore_incomplete: dataset.games = [ g for g in dataset.games if g.status == "success" or g.status == "failure" ] for prototype in prototypes: p = prototype(out_dir, dataset.games, stat_logger, name) p.save_as_pdf()
image_builder, crop_builder = None, None # Load image logger.info('Loading images..') use_resnet = False if 'image' in config['model']: logger.info('Loading images..') image_builder = get_img_builder(config['model']['image'], args.img_dir) use_resnet = image_builder.is_raw_image() assert False, "Guesser + Image is not yet available" # Load data logger.info('Loading data..') trainset = Dataset(args.data_dir, "train", image_builder, crop_builder) validset = Dataset(args.data_dir, "valid", image_builder, crop_builder) testset = Dataset(args.data_dir, "test", image_builder, crop_builder) # Load dictionary logger.info('Loading dictionary..') tokenizer = GWTokenizer(os.path.join(args.data_dir, args.dict_file)) # Build Network logger.info('Building network..') network = GuesserNetwork(config['model'], num_words=tokenizer.no_words) # Build Optimizer logger.info('Building optimizer..') optimizer, outputs = create_optimizer(network, config["optimizer"])
logger.info('Loading images..') image_builder = get_img_builder(config['model']['image'], args.img_dir) use_resnet = image_builder.is_raw_image() if config["model"]['inputs'].get('crop', False): logger.info('Loading crops..') crop_builder = get_img_builder(config['model']['crop'], args.crop_dir, is_crop=True) use_resnet = crop_builder.is_raw_image() use_resnet |= image_builder.is_raw_image() use_process |= image_builder.require_multiprocess() # Load data logger.info('Loading data..') trainset = Dataset(args.data_dir, "train", image_builder, crop_builder, rcnn, args.no_games_to_load) validset = Dataset(args.data_dir, "valid", image_builder, crop_builder, rcnn, args.no_games_to_load) testset = Dataset_visg("/home/xzp/guesswhat_v2/data/nag2.json", image_builder, crop_builder, rcnn, args.no_games_to_load) # Load dictionary logger.info('Loading dictionary..') tokenizer = GWTokenizer(args.dict_file) # Load glove glove = None # if config["model"]["question"]['glove']: # logger.info('Loading glove..') # glove = GloveEmbeddings(args.glove_file)
parser.add_argument("-name", type=str, help="Output directory", required=True) parser.add_argument("-normalize", type=bool, help="normalize word representation", required=True) parser.add_argument("-ignore_incomplete", type=bool, default=True, help="Ignore incomplete games in the dataset") args = parser.parse_args() dataset = Dataset(args.data_dir, args.name) games = [ g for g in dataset.games if g.status == "success" or g.status == "failure" ] N = len(games) data = np.zeros((5, 5)) questions = [] for game in games: questions.append(game.questions) questions = list(itertools.chain(*questions)) # Do the tfidf
logger.info('Loading images..') image_builder = get_img_builder(config['model']['image'], args.img_dir) use_resnet = image_builder.is_raw_image() use_process |= image_builder.require_multiprocess() if config["model"]['inputs'].get('crop', False): logger.info('Loading crops..') crop_builder = get_img_builder(config['model']['crop'], args.crop_dir, is_crop=True) use_resnet = crop_builder.is_raw_image() use_process |= crop_builder.require_multiprocess() # Load data logger.info('Loading data..') trainset = Dataset(args.data_dir, "train", image_builder, crop_builder, False, args.no_games_to_load) validset = Dataset(args.data_dir, "valid", image_builder, crop_builder, False, args.no_games_to_load) testset = Dataset(args.data_dir, "test", image_builder, crop_builder, False, args.no_games_to_load) # Load dictionary logger.info('Loading dictionary..') tokenizer = GWTokenizer(args.dict_file) # Load glove glove = None if config["model"]["question"]['glove']: logger.info('Loading glove..') glove = GloveEmbeddings(args.glove_file)
k_best=loop_config['loop']['beam_k_best']) looper_evaluator = BasicLooper( loop_config, oracle_wrapper=oracle_wrapper, guesser_wrapper=guesser_wrapper, qgen_wrapper=qgen_wrapper, tokenizer=tokenizer, batch_size=loop_config["optimizer"]["batch_size"]) # Compute the initial scores logger.info(">>>-------------- INITIAL SCORE ---------------------<<<") for split in ["nd_test", "nd_valid", "od_test", "od_valid"]: logger.info("Loading dataset split {}".format(split)) testset = Dataset(args.data_dir, split, "guesswhat_nocaps", image_builder, crop_builder) logger.info(">>> New Games <<<") dump_suffix = "gameplay_{}_{}".format( split, "rl" if args.rl_identifier else "sl") compute_qgen_accuracy(sess, testset, batchifier=eval_batchifier, evaluator=looper_evaluator, tokenizer=tokenizer, mode=mode_to_evaluate, save_path=save_path, cpu_pool=cpu_pool, batch_size=batch_size, store_games=args.store_games, dump_suffix=dump_suffix)
# Load all networks configs logger = logging.getLogger() ############################### # LOAD DATA ############################# # Load image logger.info('Loading images..') image_builder = _create_image_builder_rcnn() crop_builder = None # Load data logger.info('Loading data..') # trainset = Dataset(args.data_dir, "train", image_builder, crop_builder) validset = Dataset(args.data_dir, "valid", image_builder, crop_builder, True, 10) # testset = Dataset(args.data_dir, "test", image_builder, crop_builder) dataset = validset dataset.games = validset.games # dataset, dummy_dataset = trainset, validset # dataset.games = trainset.games + validset.games + testset.games # dummy_dataset.games = [] # hack dataset to only keep one game by image image_id_set = {} games = [] for game in dataset.games: if game.image.id not in image_id_set: games.append(game) image_id_set[game.image.id] = 1
'<padding>': 0, '<start>': 1, '<stop>': 2, # '<stop_dialogue>': 3, '<unk>': 3, '<yes>': 4, '<no>': 5, '<n/a>': 6, } word2occ = collections.defaultdict(int) tknzr = TweetTokenizer(preserve_case=False) print("Processing train dataset...") trainset = Dataset(args.data_dir, "train") games = trainset.get_data() for game in games: question = game.questions[0] tokens = tknzr.tokenize(question) for tok in tokens: word2occ[tok] += 1 print("filter words...") for word, occ in word2occ.items(): if occ >= args.min_occ and word.count('.') <= 1: word2i[word] = len(word2i) print("Number of words (occ >= 1): {}".format(len(word2occ))) print("Number of words (occ >= {}): {}".format(args.min_occ, len(word2i)))