예제 #1
0
def run(input_path, output_path, pretrained_model=None):

    dataset = Dataset()

    dataset.load(input_path, generate_binary_sequences=True)
    dataset.save_metadata(output_path)
    dataset.voc.save(output_path)

    dataset.convert_arrays()

    input_shape = dataset.input_shape
    output_size = dataset.output_size

    print(len(dataset.input_images_tablet), len(dataset.input_images_desktop),
          len(dataset.partial_sequences), len(dataset.next_words))
    print(dataset.input_images_tablet.shape,
          dataset.input_images_desktop.shape, dataset.partial_sequences.shape,
          dataset.next_words.shape)

    model = pix2code(input_shape, output_size, output_path)

    if pretrained_model is not None:
        model.model.load_weights(pretrained_model)

    model.fit(dataset.input_images_tablet, dataset.input_images_desktop,
              dataset.partial_sequences, dataset.next_words)
예제 #2
0
def run(input_path,
        output_path,
        is_memory_intensive=False,
        pretrained_model=None):
    np.random.seed(1234)

    dataset = Dataset()
    dataset.load(input_path, generate_binary_sequences=True)
    dataset.save_metadata(output_path)
    dataset.voc.save(output_path)

    if not is_memory_intensive:
        dataset.convert_arrays()

        input_shape = dataset.input_shape
        output_size = dataset.output_size

        print(len(dataset.input_images), len(dataset.partial_sequences),
              len(dataset.next_words))
        print(dataset.input_images.shape, dataset.partial_sequences.shape,
              dataset.next_words.shape)
    else:
        gui_paths, img_paths = Dataset.load_paths_only(input_path)

        input_shape = dataset.input_shape
        output_size = dataset.output_size
        steps_per_epoch = dataset.size / BATCH_SIZE

        voc = Vocabulary()
        voc.retrieve(output_path)

        generator = Generator.data_generator(voc,
                                             gui_paths,
                                             img_paths,
                                             batch_size=BATCH_SIZE,
                                             generate_binary_sequences=True)

    model = pix2code(input_shape, output_size, output_path)

    if pretrained_model is not None:
        model.model.load_weights(pretrained_model)

    if not is_memory_intensive:
        model.fit(dataset.input_images, dataset.partial_sequences,
                  dataset.next_words)
    else:
        model.fit_generator(generator, steps_per_epoch=steps_per_epoch)
예제 #3
0
def run(input_path, trained_model):

    dataset = Dataset()

    dataset.load(input_path, generate_binary_sequences=True)
    dataset.convert_arrays()

    input_shape = dataset.input_shape
    output_size = dataset.output_size

    print(len(dataset.input_images_tablet), len(dataset.input_images_desktop),
          len(dataset.partial_sequences), len(dataset.next_words))
    print(dataset.input_images_tablet.shape,
          dataset.input_images_desktop.shape, dataset.partial_sequences.shape,
          dataset.next_words.shape)

    model = pix2code(input_shape, output_size, "")

    if trained_model is not None:
        model.model.load_weights(trained_model)

    evaluation = model.evaluate(dataset.input_images_tablet,
                                dataset.input_images_desktop,
                                dataset.partial_sequences, dataset.next_words)

    correct_samples = 0
    for i in range(0, len(dataset.input_images_tablet)):
        print("Predicting {}".format(i))
        probas = model.predict(np.array([dataset.input_images_tablet[i]]),
                               np.array([dataset.input_images_desktop[i]]),
                               np.array([dataset.partial_sequences[i]]))

        prediction = np.argmax(probas)

        sparse_label = np.zeros(output_size)
        sparse_label[prediction] = 1

        if np.array_equal(sparse_label, dataset.next_words[i]):
            correct_samples += 1

    print("loss: {}".format(evaluation))
    accuracy = correct_samples / len(dataset.partial_sequences)
    print("accuracy: {}".format(accuracy))
예제 #4
0
    )
    exit(0)
else:
    trained_weights_path = argv[0]
    trained_model_name = argv[1]
    input_path = argv[2]
    output_path = argv[3]
    encoding_type = argv[4]
    search_method = "greedy" if len(argv) < 6 else argv[5]

meta_dataset = np.load("{}/meta_dataset.npy".format(trained_weights_path),
                       allow_pickle=True)
input_shape = meta_dataset[0]
output_size = meta_dataset[1]

model = pix2code(input_shape, output_size, trained_weights_path, encoding_type)
model.load(trained_model_name)

sampler = Sampler(trained_weights_path, input_shape, output_size,
                  CONTEXT_LENGTH)

# dataset = Dataset()
# if encoding_type == "one_hot":
#     dataset.load_with_one_hot_encoding(input_path, generate_binary_sequences=True)
# elif encoding_type == "w2v":
#     dataset.load_with_word2vec(input_path, generate_binary_sequences=True)
# else:
#     raise Exception("Missing parameter")

# dataset = Dataset()
# dataset.load(input_path))
예제 #5
0
    output_path = argv[4]
    search_method = "greedy" if len(argv) < 6 else argv[5]

np_load_old = np.load

# modify the default parameters of np.load
np.load = lambda *a,**k: np_load_old(*a, allow_pickle=True, **k)

# restore np.load for future normal usage
np.load = np_load_old

meta_dataset = np.load("{}/meta_dataset.npy".format(trained_weights_path))
input_shape = meta_dataset[0]
output_size = meta_dataset[1]

model = pix2code(input_shape, output_size, trained_weights_path)
model.load(trained_model_name)

sampler = Sampler(trained_weights_path, input_shape, output_size, CONTEXT_LENGTH)

file_name = basename(input_path_tablet)[:basename(input_path_tablet).find(".")]

#adjusted in order to deal with two input images
evaluation_img_tablet = Utils.get_preprocessed_img(input_path_tablet, IMAGE_SIZE)
evaluation_img_desktop = Utils.get_preprocessed_img(input_path_desktop, IMAGE_SIZE)

if search_method == "greedy":
    result, _ = sampler.predict_greedy(model, np.array([evaluation_img_tablet]), np.array([evaluation_img_desktop]))
    print("Result greedy: {}".format(result))
else:
    beam_width = int(search_method)
예제 #6
0
def run(input_path,
        output_path,
        is_memory_intensive=False,
        pretrained_model=None,
        use_validation_data=False):
    np.random.seed(1234)

    dataset = Dataset()
    # generate_binary_sequences=True 意味着对partial_sequences进行one-hot编码
    dataset.load(input_path, generate_binary_sequences=True)
    dataset.save_metadata(output_path)
    dataset.voc.save(output_path)

    if not is_memory_intensive:
        dataset.convert_arrays()

        input_shape = dataset.input_shape
        output_size = dataset.output_size

        print(len(dataset.input_images), len(dataset.partial_sequences),
              len(dataset.next_words))
        print(dataset.input_images.shape, dataset.partial_sequences.shape,
              dataset.next_words.shape)
    else:
        gui_paths, img_paths = Dataset.load_paths_only(input_path)

        input_shape = dataset.input_shape
        output_size = dataset.output_size
        steps_per_epoch = dataset.size / BATCH_SIZE

        voc = Vocabulary()
        voc.retrieve(output_path)

        generator = Generator.data_generator(voc,
                                             gui_paths,
                                             img_paths,
                                             batch_size=BATCH_SIZE,
                                             generate_binary_sequences=True)
        val_generator = None
        validation_steps = None

        # 初始化一个用于validation_data的generator
        if use_validation_data:
            if input_path[-1] == "/":
                val_data_path = os.path.dirname(os.path.dirname(input_path))
            else:
                val_data_path = os.path.dirname(input_path)
            if os.path.exists(val_data_path + "/eval_feature"):
                val_data_path += "/eval_feature"
            else:
                val_data_path += "/eval_set"
            assert os.path.exists(val_data_path)
            # 计算 validation steps
            val_dataset = Dataset()
            # generate_binary_sequences=True 意味着对partial_sequences进行one-hot编码
            val_dataset.load(val_data_path, generate_binary_sequences=True)
            validation_steps = 1 if val_dataset.size / BATCH_SIZE < 1 else val_dataset.size / BATCH_SIZE

            val_gui_paths, val_img_paths = Dataset.load_paths_only(
                val_data_path)
            val_generator = Generator.data_generator(
                voc,
                val_gui_paths,
                val_img_paths,
                batch_size=BATCH_SIZE,
                generate_binary_sequences=True,
                mode="eval")

    model = pix2code(input_shape, output_size, output_path)

    if pretrained_model is not None:
        model.model.load_weights(pretrained_model)

    if not is_memory_intensive:
        history = model.fit(dataset.input_images, dataset.partial_sequences,
                            dataset.next_words)
    else:
        checkpoint = model.make_checkpoint()
        # tbcallback = model.tensorboard_callback()
        callbacks_list = [checkpoint]
        history = model.fit_generator(generator,
                                      steps_per_epoch=steps_per_epoch,
                                      val_data=val_generator,
                                      val_steps=validation_steps,
                                      callbacks_list=callbacks_list)

    model.train_visualization(history)
    model.save_model_to_json()
예제 #7
0
def run(input_path,
        output_path,
        is_memory_intensive=False,
        pretrained_model=None):
    np.random.seed(1234)

    dataset = Dataset()
    dataset.load(input_path, generate_binary_sequences=True)
    dataset.save_metadata(output_path)
    dataset.voc.save(output_path)

    if not is_memory_intensive:
        dataset.convert_arrays()

        input_shape = dataset.input_shape
        output_size = dataset.output_size

        print(len(dataset.input_images), len(dataset.partial_sequences),
              len(dataset.next_words))
        print(dataset.input_images.shape, dataset.partial_sequences.shape,
              dataset.next_words.shape)
    else:
        gui_paths, img_paths = Dataset.load_paths_only(input_path)

        input_shape = dataset.input_shape
        output_size = dataset.output_size
        steps_per_epoch = dataset.size / BATCH_SIZE

        voc = Vocabulary()
        voc.retrieve(output_path)

        generator = Generator.data_generator(voc,
                                             gui_paths,
                                             img_paths,
                                             batch_size=BATCH_SIZE,
                                             generate_binary_sequences=True)

    model = pix2code(input_shape, output_size, output_path)

    print(output_path + "model_summary")
    model_summary = open(output_path + "model_summary" + ".txt", "w+")
    model.model.summary(print_fn=lambda x: model_summary.write(x + '\n'))
    model_summary.write("\n")
    model_summary.write("CONTEXT_LENGTH " + str(CONTEXT_LENGTH) + '\n')
    model_summary.write("IMAGE_SIZE " + str(IMAGE_SIZE) + '\n')
    model_summary.write("BATCH_SIZE " + str(BATCH_SIZE) + '\n')
    model_summary.write("EPOCHS " + str(EPOCHS) + '\n')
    model_summary.write("STEPS_PER_EPOCH " + str(STEPS_PER_EPOCH) + '\n')
    model_summary.write("input_shape " + str(input_shape) + '\n')
    model_summary.write("output_size " + str(output_size) + '\n')
    model_summary.write("input_images " + str(len(dataset.input_images)) +
                        '\n')
    model_summary.write("partial_sequences " +
                        str(len(dataset.partial_sequences)) + '\n')
    model_summary.write("next_words " + str(len(dataset.next_words)) + '\n')

    if pretrained_model is not None:
        model.model.load_weights(pretrained_model)

    if not is_memory_intensive:
        model.fit(dataset.input_images, dataset.partial_sequences,
                  dataset.next_words, output_path)
    else:
        model.fit_generator(generator,
                            output_path,
                            steps_per_epoch=steps_per_epoch)