Esempio n. 1
0
def get_detector_image_generator(labels,
                                 width,
                                 height,
                                 augmenter=None,
                                 area_threshold=0.5):
    """Generated augmented (image, lines) tuples from a list
    of (filepath, lines, confidence) tuples. Confidence is
    not used right now but is included for a future release
    that uses semi-supervised data.

    Args:
        labels: A list of (image, lines, confience) tuples.
        augmenter: An augmenter to apply to the images.
        width: The width to use for output images
        height: The height to use for output images
        area_threshold: The area threshold to use to keep
            characters in augmented images.
    """
    labels = labels.copy()
    for index in itertools.cycle(range(len(labels))):
        if index == 0:
            random.shuffle(labels)
        image_filepath, lines = labels[index]
        image = tools.read(image_filepath)
        if augmenter is not None:
            image, lines = tools.augment(boxes=lines,
                                         boxes_format='lines',
                                         image=image,
                                         area_threshold=area_threshold,
                                         augmenter=augmenter)
        image, scale = tools.fit(image,
                                 width=width,
                                 height=height,
                                 mode='letterbox',
                                 return_scale=True)
        lines = tools.adjust_boxes(boxes=lines,
                                   boxes_format='lines',
                                   scale=scale)

        bboxes = [line[0] for line in lines]
        words = [line[1] for line in lines]
        words = ''.join(words)

        yield image[np.newaxis, ...], np.array(bboxes)[np.newaxis, ...],\
              np.array(words)[np.newaxis, ... ], np.ones((image.shape[0], image.shape[1]), np.float32)[np.newaxis, ...],\
              np.ones(len(words), np.float32)[np.newaxis, ...]
Esempio n. 2
0
def main():
    if tf.__version__.split('.')[0] != "1":
        raise Exception("Tensorflow version 1 required")

    if a.seed is None:
        a.seed = random.randint(0, 2**31 - 1)

    tf.set_random_seed(a.seed)
    np.random.seed(a.seed)
    random.seed(a.seed)

    if not os.path.exists(a.output_dir):
        os.makedirs(a.output_dir)
#%% test
    if a.mode == "test" or a.mode == "export":
        if a.checkpoint is None:
            raise Exception("checkpoint required for test mode")

        # load some options from the checkpoint
        options = {"which_direction", "ngf", "ndf", "lab_colorization"}
        with open(os.path.join(a.checkpoint, "options.json")) as f:
            for key, val in json.loads(f.read()).items():
                if key in options:
                    print("loaded", key, "=", val)
                    setattr(a, key, val)
        # disable these features in test mode
        a.scale_size = CROP_SIZE
        a.flip = False
#%%
    for k, v in a._get_kwargs():
        print(k, "=", v)

    with open(os.path.join(a.output_dir, "options.json"), "w") as f:
        f.write(json.dumps(vars(a), sort_keys=True, indent=4))
#%%    export the meta
    if a.mode == "export":
        # export the generator to a meta graph that can be imported later for standalone generation
        if a.lab_colorization:
            raise Exception("export not supported for lab_colorization")

        input = tf.placeholder(tf.string, shape=[1])
        input_data = tf.decode_base64(input[0])
        input_image = tf.image.decode_png(input_data)

        # remove alpha channel if present
        #if true, excute the former ,otherwise the latter
        input_image = tf.cond(tf.equal(tf.shape(input_image)[2],
                                       4), lambda: input_image[:, :, :3],
                              lambda: input_image)
        # convert grayscale to RGB
        input_image = tf.cond(tf.equal(tf.shape(input_image)[2], 1),
                              lambda: tf.image.grayscale_to_rgb(input_image),
                              lambda: input_image)

        input_image = tf.image.convert_image_dtype(input_image,
                                                   dtype=tf.float32)
        input_image.set_shape([CROP_SIZE, CROP_SIZE, 3])
        batch_input = tf.expand_dims(input_image, axis=0)

        with tf.variable_scope("generator"):
            batch_output = tools.deprocess(
                create_generator(tools.preprocess(batch_input), 3))

        output_image = tf.image.convert_image_dtype(batch_output,
                                                    dtype=tf.uint8)[0]
        if a.output_filetype == "png":
            output_data = tf.image.encode_png(output_image)
        elif a.output_filetype == "jpeg":
            output_data = tf.image.encode_jpeg(output_image, quality=80)
        else:
            raise Exception("invalid filetype")
        output = tf.convert_to_tensor([tf.encode_base64(output_data)])

        key = tf.placeholder(tf.string, shape=[1])
        inputs = {"key": key.name, "input": input.name}
        tf.add_to_collection("inputs", json.dumps(inputs))
        outputs = {
            "key": tf.identity(key).name,
            "output": output.name,
        }
        tf.add_to_collection("outputs", json.dumps(outputs))

        init_op = tf.global_variables_initializer()
        restore_saver = tf.train.Saver()
        export_saver = tf.train.Saver()

        with tf.Session() as sess:
            sess.run(init_op)
            print("loading model from checkpoint")
            checkpoint = tf.train.latest_checkpoint(a.checkpoint)
            restore_saver.restore(sess, checkpoint)
            print("exporting model")
            export_saver.export_meta_graph(
                filename=os.path.join(a.output_dir, "export.meta"))
            export_saver.save(sess,
                              os.path.join(a.output_dir, "export"),
                              write_meta_graph=False)

        return


#%%
    examples = load_examples(a)
    print("examples count = %d" % examples.count)

    # inputs and targets are [batch_size, height, width, channels]
    model = create_model(examples.inputs, examples.targets, a)

    # undo colorization splitting on images that we use for display/output
    if a.lab_colorization:
        if a.which_direction == "AtoB":
            # inputs is brightness, this will be handled fine as a grayscale image
            # need to augment targets and outputs with brightness
            targets = tools.augment(examples.targets, examples.inputs)
            outputs = tools.augment(model.outputs, examples.inputs)
            # inputs can be deprocessed normally and handled as if they are single channel
            # grayscale images
            inputs = tools.deprocess(examples.inputs)
        elif a.which_direction == "BtoA":
            # inputs will be color channels only, get brightness from targets
            inputs = tools.augment(examples.inputs, examples.targets)
            targets = tools.deprocess(examples.targets)
            outputs = tools.deprocess(model.outputs)
        else:
            raise Exception("invalid direction")
    else:
        inputs = tools.deprocess(examples.inputs)
        targets = tools.deprocess(examples.targets)
        outputs = tools.deprocess(model.outputs)

    # reverse any processing on images so they can be written to disk or displayed to user
    with tf.name_scope("convert_inputs"):
        converted_inputs = convert(inputs)

    with tf.name_scope("convert_targets"):
        converted_targets = convert(targets)

    with tf.name_scope("convert_outputs"):
        converted_outputs = convert(outputs)

    with tf.name_scope("encode_images"):
        display_fetches = {
            "paths":
            examples.paths,
            "inputs":
            tf.map_fn(tf.image.encode_png,
                      converted_inputs,
                      dtype=tf.string,
                      name="input_pngs"),
            "targets":
            tf.map_fn(tf.image.encode_png,
                      converted_targets,
                      dtype=tf.string,
                      name="target_pngs"),
            "outputs":
            tf.map_fn(tf.image.encode_png,
                      converted_outputs,
                      dtype=tf.string,
                      name="output_pngs"),
        }

    # summaries
    with tf.name_scope("inputs_summary"):
        tf.summary.image("inputs", converted_inputs)

    with tf.name_scope("targets_summary"):
        tf.summary.image("targets", converted_targets)

    with tf.name_scope("outputs_summary"):
        tf.summary.image("outputs", converted_outputs)

    with tf.name_scope("predict_real_summary"):
        tf.summary.image(
            "predict_real",
            tf.image.convert_image_dtype(model.predict_real, dtype=tf.uint8))

    with tf.name_scope("predict_fake_summary"):
        tf.summary.image(
            "predict_fake",
            tf.image.convert_image_dtype(model.predict_fake, dtype=tf.uint8))

    tf.summary.scalar("discriminator_loss", model.discrim_loss)
    tf.summary.scalar("generator_loss_GAN", model.gen_loss_GAN)
    tf.summary.scalar("generator_loss_L1", model.gen_loss_L1)

    for var in tf.trainable_variables():
        tf.summary.histogram(var.op.name + "/values", var)

    for grad, var in model.discrim_grads_and_vars + model.gen_grads_and_vars:
        tf.summary.histogram(var.op.name + "/gradients", grad)

    with tf.name_scope("parameter_count"):
        parameter_count = tf.reduce_sum(
            [tf.reduce_prod(tf.shape(v)) for v in tf.trainable_variables()])

    saver = tf.train.Saver(max_to_keep=1)

    logdir = a.output_dir if (a.trace_freq > 0 or a.summary_freq > 0) else None
    sv = tf.train.Supervisor(logdir=logdir, save_summaries_secs=0, saver=None)
    with sv.managed_session() as sess:
        print("parameter_count =", sess.run(parameter_count))

        if a.checkpoint is not None:
            print("loading model from checkpoint")
            checkpoint = tf.train.latest_checkpoint(a.checkpoint)
            saver.restore(sess, checkpoint)

        max_steps = 2**32
        if a.max_epochs is not None:
            max_steps = examples.steps_per_epoch * a.max_epochs
        if a.max_steps is not None:
            max_steps = a.max_steps

        if a.mode == "test":
            # testing
            # at most, process the test data once
            max_steps = min(examples.steps_per_epoch, max_steps)
            for step in range(max_steps):
                results = sess.run(display_fetches)
                filesets = save_images(results)
                for i, f in enumerate(filesets):
                    print("evaluated image", f["name"])
                index_path = append_index(filesets)

            print("wrote index at", index_path)
        else:
            # training
            start = time.time()

            for step in range(max_steps):

                def should(freq):
                    return freq > 0 and ((step + 1) % freq == 0
                                         or step == max_steps - 1)

                options = None
                run_metadata = None
                if should(a.trace_freq):
                    options = tf.RunOptions(
                        trace_level=tf.RunOptions.FULL_TRACE)
                    run_metadata = tf.RunMetadata()

                fetches = {
                    "train": model.train,
                    "global_step": sv.global_step,
                }

                if should(a.progress_freq):
                    fetches["discrim_loss"] = model.discrim_loss
                    fetches["gen_loss_GAN"] = model.gen_loss_GAN
                    fetches["gen_loss_L1"] = model.gen_loss_L1

                if should(a.summary_freq):
                    fetches["summary"] = sv.summary_op

                if should(a.display_freq):
                    fetches["display"] = display_fetches

                results = sess.run(fetches,
                                   options=options,
                                   run_metadata=run_metadata)

                if should(a.summary_freq):
                    print("recording summary")
                    sv.summary_writer.add_summary(results["summary"],
                                                  results["global_step"])

                if should(a.display_freq):
                    print("saving display images")
                    filesets = save_images(results["display"],
                                           step=results["global_step"])
                    append_index(filesets, step=True)

                if should(a.trace_freq):
                    print("recording trace")
                    sv.summary_writer.add_run_metadata(
                        run_metadata, "step_%d" % results["global_step"])

                if should(a.progress_freq):
                    # global_step will have the correct step count if we resume from a checkpoint
                    train_epoch = math.ceil(results["global_step"] /
                                            examples.steps_per_epoch)
                    train_step = (results["global_step"] -
                                  1) % examples.steps_per_epoch + 1
                    rate = (step + 1) * a.batch_size / (time.time() - start)
                    remaining = (max_steps - step) * a.batch_size / rate
                    print(
                        "progress  epoch %d  step %d  image/sec %0.1f  remaining %dm"
                        % (train_epoch, train_step, rate, remaining / 60))
                    print("discrim_loss", results["discrim_loss"])
                    print("gen_loss_GAN", results["gen_loss_GAN"])
                    print("gen_loss_L1", results["gen_loss_L1"])

                if should(a.save_freq):
                    print("saving model")
                    saver.save(sess,
                               os.path.join(a.output_dir, "model"),
                               global_step=sv.global_step)

                if sv.should_stop():
                    break
Esempio n. 3
0
    visualize(coposition, category_id_to_name)
    #print('here')


# PREPARE THE DATASET FOR TRAINING
imageSize = [1920, 1080, 3]
trainingImageSize = [640, 360, 3]
path = "C:/Users/Nucelles 3.0/Documents/BICS/BSP S2/Project/DATA/testing"

category_id_to_name = {1: "License Plate"}

x_test, y_test = prepareDataset(path, 50, trainingImageSize, 500)

i = 1
predFormatted = dataAugmentFormat(x_test[i], y_test[i]*255)
resizedComposedImage = augment([Resize(p=1, height=144, width=256)])
finalComposedImage = resizedComposedImage(**predFormatted)
showPrediction(finalComposedImage)

model = load_model("model_30-03-2020_01-02-36_PM.h5")

results = model.evaluate(x_test, y_test, batch_size=128)
print('test loss, test acc:', results)


# score = model.evaluate(x_train, y_train)
# print("%s: %.2f%%" % (model.metrics_names[1], score[1]*100))


toShow = 3
Esempio n. 4
0
def get_detector_image_generator(labels,
                                 width,
                                 height,
                                 augmenter=None,
                                 area_threshold=0.5,
                                 focused=False,
                                 min_area=None):
    """Generated augmented (image, lines) tuples from a list
    of (filepath, lines, confidence) tuples. Confidence is
    not used right now but is included for a future release
    that uses semi-supervised data.

    Args:
        labels: A list of (image, lines, confience) tuples.
        augmenter: An augmenter to apply to the images.
        width: The width to use for output images
        height: The height to use for output images
        area_threshold: The area threshold to use to keep
            characters in augmented images.
        min_area: The minimum area for a character to be
            included.
        focused: Whether to pre-crop images to width/height containing
            a region containing text.
    """
    labels = labels.copy()
    for index in itertools.cycle(range(len(labels))):
        if index == 0:
            random.shuffle(labels)
        image_filepath, lines, confidence = labels[index]
        # print(image_filepath)
        image = tools.read(image_filepath)
        if augmenter is not None:
            image, lines = tools.augment(boxes=lines,
                                         boxes_format='lines',
                                         image=image,
                                         area_threshold=area_threshold,
                                         min_area=min_area,
                                         augmenter=augmenter)
        if focused:
            boxes = [tools.combine_line(line)[0] for line in lines]
            if boxes:
                selected = np.array(boxes[np.random.choice(len(boxes))])
                left, top = selected.min(axis=0).clip(0, np.inf).astype('int')
                if left > 0:
                    left -= np.random.randint(0, min(left, width / 2))
                if top > 0:
                    top -= np.random.randint(0, min(top, height / 2))
                image, lines = tools.augment(
                    boxes=lines,
                    augmenter=imgaug.augmenters.Sequential([
                        imgaug.augmenters.Crop(px=(int(top), 0, 0, int(left))),
                        imgaug.augmenters.CropToFixedSize(width=width,
                                                          height=height,
                                                          position='right-bottom')
                    ]),
                    boxes_format='lines',
                    image=image,
                    min_area=min_area,
                    area_threshold=area_threshold)
        image, scale = tools.fit(image,
                                 width=width,
                                 height=height,
                                 mode='letterbox',
                                 return_scale=True)
        lines = tools.adjust_boxes(boxes=lines, boxes_format='lines', scale=scale)
        yield image, lines, confidence
Esempio n. 5
0
def real_data_generator(labels,
                        width,
                        height,
                        augmenter=None,
                        area_threshold=0.5,
                        ):
    labels = labels.copy()

    for index in itertools.cycle(range(len(labels))):
        image_filepath, lines = labels[index]
        image = tools.read(image_filepath)

        if augmenter is not None:
            image, lines = tools.augment(boxes=lines,
                                         boxes_format='lines',
                                         image=image,
                                         area_threshold=area_threshold,

                                         augmenter=augmenter)

        image, scale = tools.fit(image,
                                width=width,
                                 height=height,
                                 mode='letterbox',
                                 return_scale=True)

        lines = tools.adjust_boxes(boxes=lines, boxes_format='lines', scale=scale)
        confidence_mask = np.zeros((image.shape[0], image.shape[1]), np.float32)

        confidences = []
        # character_bboxes = np.array([]).reshape(0, 4, 2)
        # new_words = []
        lines_label = []

        detector = Detector()
        if len(lines)==1:
            lines = lines[0]
        for i, line in enumerate(lines):
            word_label = []
            word_bbox, word = line[0], line[1]
            word = word.replace(',', '')
            word_bbox = np.float32(word_bbox)
            if len(word_bbox) > 0:
                for _ in range(len(word_bbox)):
                    if word == '###' or len(word.strip()) == 0:

                        cv2.fillPoly(confidence_mask, [np.int32(word_bbox)], (0))
            pursedo_bboxes, bbox_region_scores, confidence = inference_character_box(detector,
                                                                                     image,
                                                                                     word,
                                                                                     word_bbox)
            confidences.append(confidence)
            cv2.fillPoly(confidence_mask, [np.int32(word_bbox)], (confidence))
            for j in range(len(pursedo_bboxes)):
                if j>len(word)-1:
                    continue
                word_label.append((pursedo_bboxes[j], word[j]))
            lines_label.append(word_label)
            # new_words.append(word)
            # character_bboxes = np.concatenate((character_bboxes, pursedo_bboxes), 0)
            # character_bboxes.append(pursedo_bboxes)

        yield image, lines_label, 1