image_mean_file = './exp_shapes/data/image_mean.npy'

save_dir = './exp_shapes/results/%s/%s.%s' % (exp_name, snapshot_name + '_vis', '_'.join(image_sets))
os.makedirs(save_dir, exist_ok=True)


# In[ ]:


# Load vocabulary
with open(vocab_shape_file) as f:
    vocab_shape_list = [s.strip() for s in f.readlines()]
vocab_shape_dict = {vocab_shape_list[n]:n for n in range(len(vocab_shape_list))}
num_vocab_txt = len(vocab_shape_list)

assembler = Assembler(vocab_layout_file)
num_vocab_nmn = len(assembler.module_names)

# Load training data
training_questions = []
training_labels = []
training_images_list = []

for image_set in image_sets:
    with open(training_text_files % image_set) as f:
        training_questions += [l.strip() for l in f.readlines()]
    with open(training_label_files % image_set) as f:
        training_labels += [l.strip() == 'true' for l in f.readlines()]
    training_images_list.append(np.load(training_image_files % image_set))

num_questions = len(training_questions)
Beispiel #2
0
training_text_files = './exp_shapes/shapes_dataset/%s.query_str.txt'
training_image_files = './exp_shapes/shapes_dataset/%s.input.npy'
training_label_files = './exp_shapes/shapes_dataset/%s.output'
training_gt_layout_file = './exp_shapes/data/%s.query_layout_symbols.json'
image_mean_file = './exp_shapes/data/image_mean.npy'

# Load vocabulary
with open(vocab_shape_file) as f:
    vocab_shape_list = [s.strip() for s in f.readlines()]
vocab_shape_dict = {
    vocab_shape_list[n]: n
    for n in range(len(vocab_shape_list))
}
num_vocab_txt = len(vocab_shape_list)

assembler = Assembler(vocab_layout_file)
num_vocab_nmn = len(assembler.module_names)

# Load training data
training_questions = []
training_labels = []
training_images_list = []
gt_layout_list = []

for image_set in image_sets:
    with open(training_text_files % image_set) as f:
        training_questions += [l.strip() for l in f.readlines()]
    with open(training_label_files % image_set) as f:
        training_labels += [l.strip() == 'true' for l in f.readlines()]
    training_images_list.append(np.load(training_image_files % image_set))
    with open(training_gt_layout_file % image_set) as f:
def Pre(image_sets):

    for image_set in image_sets:
        with open(training_text_files % image_set) as f:
            training_questions = [l.strip() for l in f.readlines()]
        with open(training_label_files % image_set) as f:
            training_labels = [l.strip() == 'true' for l in f.readlines()]
        training_images_list.append(np.load(training_image_files % image_set))
        with open(training_gt_layout_file % image_set) as f:
            gt_layout_list = json.load(f)

    num_questions = len(training_questions)
    training_images = np.concatenate(training_images_list)

    # Shuffle the training data
    # fix random seed for data repeatibility
    np.random.seed(3)
    shuffle_inds = np.random.permutation(num_questions)

    def shuffle_array(x):
        return [x[i] for i in shuffle_inds]

    training_questions = shuffle_array(training_questions)
    training_labels = shuffle_array(training_labels)
    training_images = shuffle_array(training_images)
    gt_layout_list = shuffle_array(gt_layout_list)

    # number of training batches
    num_batches = np.ceil(num_questions / N)

    # Load vocabulary
    with open(vocab_shape_file) as f:
        vocab_shape_list = [s.strip() for s in f.readlines()]
    vocab_shape_dict = {
        vocab_shape_list[n]: n
        for n in range(len(vocab_shape_list))
    }
    num_vocab_txt = len(vocab_shape_list)

    assembler = Assembler(vocab_layout_file)
    num_vocab_nmn = len(assembler.module_names)

    # Turn the questions into vocabulary indices
    text_seq_array = np.zeros((T_encoder, num_questions), np.int32)
    seq_length_array = np.zeros(num_questions, np.int32)
    gt_layout_array = np.zeros((T_decoder, num_questions), np.int32)

    for n_q in range(num_questions):
        tokens = training_questions[n_q].split()
        seq_length_array[n_q] = len(tokens)
        for t in range(len(tokens)):
            text_seq_array[t, n_q] = vocab_shape_dict[tokens[t]]
        gt_layout_array[:, n_q] = assembler.module_list2tokens(
            gt_layout_list[n_q], T_decoder)

    image_mean = np.load(image_mean_file)
    image_array = (training_images - image_mean).astype(np.float32)
    vqa_label_array = np.array(training_labels, np.int32)
    return (num_questions, training_images, num_batches, num_vocab_txt,
            assembler, num_vocab_nmn, text_seq_array, seq_length_array,
            gt_layout_array, image_array, vqa_label_array)