예제 #1
0
 def _load_data_from_dataset(self,
                             data_dir,
                             dataset,
                             state_filter=("success", ),
                             multi_cate=False):
     games = get_games(data_dir, dataset)
     self.games = [g for g in games if g.status in state_filter]
예제 #2
0
 def clear_data_for_new_pictures(self):
     multi_cate = self.question.config.get("multi_cate", False)
     game_data = get_games(self.data_dir, "test", multi_cate)
     for i in range(len(game_data)):
         game_data[i].questions = []
         game_data[i].answers = []
         game_data[i].q_cates = []
         game_data[i].status = "incomplete"
     return game_data
예제 #3
0
 def clear_data_for_new_objects(self):
     multi_cate = self.question.config.get("multi_cate", False)
     game_data = get_games(self.data_dir, "train", multi_cate)[:20000]
     for i in range(len(game_data)):
         game_data[i].questions = []
         game_data[i].answers = []
         game_data[i].q_cates = []
         game_data[i].status = "incomplete"
         # random choose a object
         # total_object = len(game_data[i].objects)
         # choose_one = random.randint(0, total_object-1)  # this function including both end points.
         # game_data[i].object_id = game_data[i].objects[choose_one].id
     return game_data
예제 #4
0
 def _load_data_from_dataset(self,
                             data_dir,
                             dataset,
                             state_filter=("success", ),
                             multi_cate=False):
     # train/valid/test load dataset
     answers = {'yes': 0, 'no': 1, 'n/a': 2}
     old_games = get_games(data_dir, dataset)
     self.games = []
     for game in old_games:
         for i, q, a in zip(game.question_ids, game.questions,
                            game.answers):
             new_game = copy.copy(game)
             new_game.questions = q
             new_game.answers = answers[a.lower()]
             new_game.question_ids = i
             self.games.append(new_game)
예제 #5
0
    def _load_data_from_dataset(self, data_dir, dataset, state_filter=("success",), multi_cate=False):

        old_games = get_games(data_dir, dataset, multi_cate)
        # 加上数据filter,根据状态
        old_games = [g for g in old_games if g.status in state_filter]
        # preprocess games, split a dialog into a few sentences
        self.games = []
        for g in old_games:
            for i in range(len(g.questions)):
                new_game = copy.copy(g)
                new_game.questions = []
                new_game.answers = []
                new_game.q_cates = []
                for j in range(0, i):
                    new_game.questions.append(g.questions[j])
                    new_game.answers.append(g.answers[j])
                    new_game.q_cates.append(g.q_cates[j])
                new_game.questions.append(g.questions[i])
                new_game.q_cates.append(g.q_cates[i])
                self.games.append(new_game)
예제 #6
0
    def __init__(self, data_dir, dataset, option, qgen_args, oracle_args,
                 guesser_args, tokenizer):
        self.tokenizer = tokenizer
        self.qgen_args = qgen_args
        self.oracle_args = oracle_args
        self.guesser_args = guesser_args
        feature_dir = os.path.join(data_dir, "features")

        self.image_builder = ImageProvider(
            os.path.join(feature_dir, "vgg16", "fc8", "image.pkl"), "feature",
            "vgg16")

        self.crop_builder = ImageProvider(
            os.path.join(feature_dir, "vgg16", "fc8", "crop.pkl"), "feature",
            "vgg16")

        self.object_builder = ImageProvider(
            os.path.join(feature_dir, "rcnn", "size,rcnn_arch,224.txt"),
            "file", "rcnn")

        old_games = get_games(data_dir, dataset, True)
        self.games = [g for g in old_games if g.status == "success"]
        if option == "test":
            self.games = self.games[:20000]
예제 #7
0
        '<padding>': 0,
        '<start>': 1,
        '<stop>': 2,
        '<stop_dialogue>': 3,
        '<unk>': 4,
        '<yes>': 5,
        '<no>': 6,
        '<n/a>': 7,
    }

    word2occ = collections.defaultdict(int)

    tknzr = TweetTokenizer(preserve_case=False)

    print("Processing train dataset...")
    trainset = get_games(args.data_dir, "train")
    for game in trainset:
        for question in game.questions:
            tokens = tknzr.tokenize(question)
            for tok in tokens:
                word2occ[tok] += 1

    print("filter words...")
    for word, occ in word2occ.items():
        if occ >= args.min_occ and word.count('.') <= 1:
            word2i[word] = len(word2i)

    print("Number of words (occ >= 1): {}".format(len(word2occ)))
    print("Number of words (occ >= {}): {}".format(args.min_occ, len(word2i)))

    dict_path = os.path.join(args.data_dir, 'dict.json')
예제 #8
0
model1_file = os.path.join(loop_dir, "enc-dec-vgg.json")
model1_games = load_json_file(model1_file)
model3_file = os.path.join(loop_dir, "hred-rcnn.json")
model3_games = load_json_file(model3_file)
model7_file = os.path.join(loop_dir, "hred-va.json")
model7_games = load_json_file(model7_file)

for i in range(15, 20):
    if model1_games[i].__str__() != model7_games[i].__str__():
        plot_game(model1_games[i], model_name="Enc-Dec-VGG")
        plot_game(model3_games[i], model_name="Hred-RCNN")
        plot_game(model7_games[i], model_name="Hred-VA")
    else:
        print(model1_games[i].__str__())
        print(model7_games[i].__str__())
"""

games=get_games(data_dir, 'train')
for game in games:
    if game.img.id == 427135:
        plot_game(game)

games=get_games(data_dir, 'valid')
for game in games:
    if game.img.id == 427135:
        plot_game(game)

games=get_games(data_dir, 'test')
for game in games:
    if game.img.id == 427135:
        plot_game(game)