def _load_data_from_dataset(self, data_dir, dataset, state_filter=("success", ), multi_cate=False): games = get_games(data_dir, dataset) self.games = [g for g in games if g.status in state_filter]
def clear_data_for_new_pictures(self): multi_cate = self.question.config.get("multi_cate", False) game_data = get_games(self.data_dir, "test", multi_cate) for i in range(len(game_data)): game_data[i].questions = [] game_data[i].answers = [] game_data[i].q_cates = [] game_data[i].status = "incomplete" return game_data
def clear_data_for_new_objects(self): multi_cate = self.question.config.get("multi_cate", False) game_data = get_games(self.data_dir, "train", multi_cate)[:20000] for i in range(len(game_data)): game_data[i].questions = [] game_data[i].answers = [] game_data[i].q_cates = [] game_data[i].status = "incomplete" # random choose a object # total_object = len(game_data[i].objects) # choose_one = random.randint(0, total_object-1) # this function including both end points. # game_data[i].object_id = game_data[i].objects[choose_one].id return game_data
def _load_data_from_dataset(self, data_dir, dataset, state_filter=("success", ), multi_cate=False): # train/valid/test load dataset answers = {'yes': 0, 'no': 1, 'n/a': 2} old_games = get_games(data_dir, dataset) self.games = [] for game in old_games: for i, q, a in zip(game.question_ids, game.questions, game.answers): new_game = copy.copy(game) new_game.questions = q new_game.answers = answers[a.lower()] new_game.question_ids = i self.games.append(new_game)
def _load_data_from_dataset(self, data_dir, dataset, state_filter=("success",), multi_cate=False): old_games = get_games(data_dir, dataset, multi_cate) # 加上数据filter,根据状态 old_games = [g for g in old_games if g.status in state_filter] # preprocess games, split a dialog into a few sentences self.games = [] for g in old_games: for i in range(len(g.questions)): new_game = copy.copy(g) new_game.questions = [] new_game.answers = [] new_game.q_cates = [] for j in range(0, i): new_game.questions.append(g.questions[j]) new_game.answers.append(g.answers[j]) new_game.q_cates.append(g.q_cates[j]) new_game.questions.append(g.questions[i]) new_game.q_cates.append(g.q_cates[i]) self.games.append(new_game)
def __init__(self, data_dir, dataset, option, qgen_args, oracle_args, guesser_args, tokenizer): self.tokenizer = tokenizer self.qgen_args = qgen_args self.oracle_args = oracle_args self.guesser_args = guesser_args feature_dir = os.path.join(data_dir, "features") self.image_builder = ImageProvider( os.path.join(feature_dir, "vgg16", "fc8", "image.pkl"), "feature", "vgg16") self.crop_builder = ImageProvider( os.path.join(feature_dir, "vgg16", "fc8", "crop.pkl"), "feature", "vgg16") self.object_builder = ImageProvider( os.path.join(feature_dir, "rcnn", "size,rcnn_arch,224.txt"), "file", "rcnn") old_games = get_games(data_dir, dataset, True) self.games = [g for g in old_games if g.status == "success"] if option == "test": self.games = self.games[:20000]
'<padding>': 0, '<start>': 1, '<stop>': 2, '<stop_dialogue>': 3, '<unk>': 4, '<yes>': 5, '<no>': 6, '<n/a>': 7, } word2occ = collections.defaultdict(int) tknzr = TweetTokenizer(preserve_case=False) print("Processing train dataset...") trainset = get_games(args.data_dir, "train") for game in trainset: for question in game.questions: tokens = tknzr.tokenize(question) for tok in tokens: word2occ[tok] += 1 print("filter words...") for word, occ in word2occ.items(): if occ >= args.min_occ and word.count('.') <= 1: word2i[word] = len(word2i) print("Number of words (occ >= 1): {}".format(len(word2occ))) print("Number of words (occ >= {}): {}".format(args.min_occ, len(word2i))) dict_path = os.path.join(args.data_dir, 'dict.json')
model1_file = os.path.join(loop_dir, "enc-dec-vgg.json") model1_games = load_json_file(model1_file) model3_file = os.path.join(loop_dir, "hred-rcnn.json") model3_games = load_json_file(model3_file) model7_file = os.path.join(loop_dir, "hred-va.json") model7_games = load_json_file(model7_file) for i in range(15, 20): if model1_games[i].__str__() != model7_games[i].__str__(): plot_game(model1_games[i], model_name="Enc-Dec-VGG") plot_game(model3_games[i], model_name="Hred-RCNN") plot_game(model7_games[i], model_name="Hred-VA") else: print(model1_games[i].__str__()) print(model7_games[i].__str__()) """ games=get_games(data_dir, 'train') for game in games: if game.img.id == 427135: plot_game(game) games=get_games(data_dir, 'valid') for game in games: if game.img.id == 427135: plot_game(game) games=get_games(data_dir, 'test') for game in games: if game.img.id == 427135: plot_game(game)