예제 #1
0
    def use_release(self, gpus=1):
        """Use the latest DeepForest model release from github and load model.
        Optionally download if release doesn't exist.

        Returns:
            model (object): A trained keras model
            gpus: number of gpus to parallelize, default to 1
        """
        # Download latest model from github release
        release_tag, self.weights = utilities.use_release()

        # load saved model and tag release
        self.__release_version__ = release_tag
        print("Loading pre-built model: {}".format(release_tag))

        if gpus == 1:
            with warnings.catch_warnings():
                # Suppress compilte warning, not relevant here
                warnings.filterwarnings("ignore", category=UserWarning)
                self.model = utilities.read_model(self.weights, self.config)

            # Convert model
            self.prediction_model = convert_model(self.model)
        elif gpus > 1:
            backbone = models.backbone(self.config["backbone"])
            n_classes = len(self.labels.keys())
            self.model, self.training_model, self.prediction_model = create_models(
                backbone.retinanet,
                num_classes=n_classes,
                weights=self.weights,
                multi_gpu=gpus)

        # add to config
        self.config["weights"] = self.weights
예제 #2
0
def test_lengths(config):
    """Assert that a csv generator and tfrecords create
    the same number of images in a epoch"""

    created_records = tfrecords.create_tfrecords(
        annotations_file="tests/output/testfile_tfrecords.csv",
        class_file="tests/output/classes.csv",
        image_min_side=config["image-min-side"],
        backbone_model=config["backbone"],
        size=100,
        savedir="tests/output/")

    # tfdata
    tf_filenames = find_tf_filenames(path="tests/output/*.tfrecord")

    # keras generator
    backbone = models.backbone(config["backbone"])
    generator = csv_generator.CSVGenerator(
        csv_data_file="tests/output/testfile_tfrecords.csv",
        csv_class_file="tests/output/classes.csv",
        image_min_side=config["image-min-side"],
        preprocess_image=backbone.preprocess_image,
    )

    fit_genertor_length = generator.size()
    assert len(tf_filenames) == fit_genertor_length
예제 #3
0
    def __init__(self, weights=None, saved_model=None):
        self.weights = weights
        self.saved_model = saved_model

        # Read config file - if a config file exists in local dir use it,
        # if not use installed.
        if os.path.exists("deepforest_config.yml"):
            config_path = "deepforest_config.yml"
        else:
            try:
                config_path = get_data("deepforest_config.yml")
            except Exception as e:
                raise ValueError(
                    "No deepforest_config.yml found either in local "
                    "directory or in installed package location. {}".format(e))

        print("Reading config file: {}".format(config_path))
        self.config = utilities.read_config(config_path)

        # Create a label dict, defaults to "Tree"
        self.read_classes()

        # release version id to flag if release is being used
        self.__release_version__ = None

        # Load saved model if needed
        if self.saved_model:
            print("Loading saved model")
            # Capture user warning, not relevant here
            with warnings.catch_warnings():
                warnings.filterwarnings("ignore", category=UserWarning)
                self.model = models.load_model(saved_model)
                self.prediction_model = convert_model(self.model)

        elif self.weights:
            print("Creating model from weights")
            backbone = models.backbone(self.config["backbone"])
            self.model, self.training_model, self.prediction_model = create_models(
                backbone.retinanet, num_classes=1, weights=self.weights)
        else:
            print(
                "A blank deepforest object created. "
                "To perform prediction, either train or load an existing model."
            )
            self.model = None
예제 #4
0
def test_equivalence(config, setup_create_tensors):
    # unpack created tensors
    tf_inputs, tf_targets = setup_create_tensors

    # the image going in to tensorflow should be equivalent
    # to the image from the fit_generator
    backbone = models.backbone(config["backbone"])

    # CSV generator
    generator = csv_generator.CSVGenerator(
        csv_data_file="tests/output/testfile_tfrecords.csv",
        csv_class_file="tests/data/classes.csv",
        image_min_side=config["image-min-side"],
        preprocess_image=backbone.preprocess_image,
    )

    # find file in randomize generator group
    first_file = generator.groups[0][0]
    gen_filename = os.path.join(generator.base_dir,
                                generator.image_names[first_file])
    original_image = generator.load_image(first_file)
    inputs, targets = generator.__getitem__(0)

    image = inputs[0, ...]
    targets = targets[0][0, ...]

    with tf.Session() as sess:
        # seek the randomized image to match
        tf_inputs, tf_targets = sess.run([tf_inputs, tf_targets])

    # assert filename is the same as generator
    # assert gen_filename == filename
    # tf_image = tf_image[0,...]
    tf_inputs = tf_inputs[0, ...]
    tf_targets = tf_targets[0][0, ...]

    # Same shape
    # assert tf_image.shape == image.shape
    assert tf_inputs.shape == image.shape
    assert tf_targets.shape == targets.shape
예제 #5
0
def create_tfrecords(annotations_file,
                     class_file,
                     backbone_model="resnet50",
                     image_min_side=800,
                     size=1,
                     savedir="./"):
    """
    Args:
        annotations_file: path to 6 column data in form
            image_path, xmin, ymin, xmax, ymax, label
        backbone_model: A keras retinanet backbone
        image_min_side: resized image object minimum size
        size: Number of images per tfrecord
        savedir: dir path to save tfrecords files

    Returns:
        written_files: A list of path names of written tfrecords
    """
    memory_used = []

    # Image preprocess function
    backbone = models.backbone(backbone_model)

    # filebase name
    image_basename = os.path.splitext(os.path.basename(annotations_file))[0]

    # Syntax checks
    # Check annotations file only JPEG, PNG, GIF, or BMP are allowed.
    # df = pd.read_csv(annotations_file,
    # names=["image_path","xmin","ymin","xmax","ymax","label"])
    # df['FileType'] = df.image_path.str.split('.').str[-1].str.lower()
    # bad_files = df[~df['FileType'].isin(["jpeg","jpg","png","gif","bmp"])]

    # if not bad_files.empty:
    # raise ValueError("Check annotations file, only JPEG, PNG, GIF, or BMP are allowed,
    # {} incorrect files found /n {}: ".format(bad_files.shape[0],bad_files.head()))

    # Check dtypes, cannot use pandas, or will coerce in the presence of NAs
    with open(annotations_file, 'r') as f:
        reader = csv.reader(f, delimiter=',')
        row = next(reader)
        if row[1].count(".") > 0:
            raise ValueError(
                "Annotation files should be headerless with integer box, {} is not a int"
                .format(row[1]))

    # Create generator - because of how retinanet yields data,
    # this should always be 1. Shape problems in the future?
    train_generator = CSVGenerator(annotations_file,
                                   class_file,
                                   batch_size=1,
                                   image_min_side=image_min_side,
                                   preprocess_image=backbone.preprocess_image)

    # chunk size
    indices = np.arange(train_generator.size())
    chunks = [
        indices[i * size:(i * size) + size]
        for i in range(ceil(len(indices) / size))
    ]

    written_files = []
    for chunk in chunks:
        # Create tfrecord dataset and save it for output
        fname = savedir + "{}_{}.tfrecord".format(image_basename, chunk[0])
        written_files.append(fname)
        writer = tf.io.TFRecordWriter(fname)
        images = []
        regression_targets = []
        class_targets = []
        filename = []
        original_image = []
        for i in chunk:
            # Original image
            original_image.append(train_generator.load_image(i))

            batch = train_generator.__getitem__(i),

            # split into images and tar  gets
            inputs, targets = batch[0]

            # grab image, asssume batch size of 1, squeeze
            images.append(inputs[0, ...])

            # Grab anchor targets
            regression_batch, labels_batch = targets

            # grab regression anchors
            # regression_batch: batch that contains bounding-box regression targets
            # for an image & anchor states (np.array of shape (batch_size, N, 4 + 1),
            # where N is the number of anchors for an image, the first 4 columns
            # define regression targets for (x1, y1, x2, y2) and the
            # last column defines anchor states (-1 for ignore, 0 for bg, 1 for fg).
            regression_anchors = regression_batch[0, ...]
            regression_targets.append(regression_anchors)

            # grab class labels - squeeze out batch size
            # From retinanet: labels_batch: batch that contains labels & anchor states
            # (np.array of shape (batch_size, N, num_classes + 1),
            # where N is the number of anchors for an image and the last column defines
            # the anchor state (-1 for ignore, 0 for bg, 1 for fg).
            labels = labels_batch[0, ...]
            print("Label shape is: {}".format(labels.shape))
            class_targets.append(labels)

            # append filename by looking at group index
            current_index = train_generator.groups[i][0]

            # Grab filename and append to the full path
            fname = train_generator.image_names[current_index]
            fname = os.path.join(train_generator.base_dir, fname)

            filename.append(fname)

        for image, regression_target, class_target, fname, orig_image in zip(
                images, regression_targets, class_targets, filename,
                original_image):
            tf_example = create_tf_example(image, regression_target,
                                           class_target, fname, orig_image)
            writer.write(tf_example.SerializeToString())

        memory_used.append(psutil.virtual_memory().used / 2**30)

    #plt.plot(memory_used)
    #plt.title('Evolution of memory')
    #plt.xlabel('iteration')
    #plt.ylabel('memory used (GB)')
    #plt.savefig(os.path.join(savedir, "memory.png"))

    return written_files
예제 #6
0
def main(forest_object,
         args=None,
         input_type="fit_generator",
         list_of_tfrecords=None,
         comet_experiment=None):
    """
    Main Training Loop
    Args:
        forest_object: a deepforest class object
        args: Keras retinanet argparse
        list_of_tfrecords: list of tfrecords to parse
        input_type: "fit_generator" or "tfrecord" input type
    """
    # parse arguments
    if args is None:
        args = sys.argv[1:]
    args = parse_args(args)

    # create object that stores backbone information
    backbone = models.backbone(args.backbone)

    # make sure keras is the minimum required version
    check_keras_version()

    # optionally choose specific GPU
    if args.gpu:
        setup_gpu(args.gpu)

    # optionally load config parameters
    if args.config:
        args.config = read_config_file(args.config)

    # data input
    if input_type == "fit_generator":
        # create the generators
        train_generator, validation_generator = create_generators(
            args, backbone.preprocess_image)

        # placeholder target tensor for creating models
        targets = None

    elif input_type == "tfrecord":
        # Create tensorflow iterators
        iterator = tfrecords.create_dataset(list_of_tfrecords, args.batch_size)
        next_element = iterator.get_next()

        # Split into inputs and targets
        inputs = next_element[0]
        targets = [next_element[1], next_element[2]]

        validation_generator = None

    else:
        raise ValueError(
            "{} input type is invalid. Only 'tfrecord' or 'for_generator' "
            "input types are accepted for model training".format(input_type))

    # create the model
    if args.snapshot is not None:
        print('Loading model, this may take a second...')
        model = models.load_model(args.snapshot, backbone_name=args.backbone)
        training_model = model
        anchor_params = None
        if args.config and 'anchor_parameters' in args.config:
            anchor_params = parse_anchor_parameters(args.config)
        prediction_model = retinanet_bbox(model=model,
                                          anchor_params=anchor_params)
    else:
        weights = args.weights
        # default to imagenet if nothing else is specified
        if weights is None and args.imagenet_weights:
            weights = backbone.download_imagenet()

        print('Creating model, this may take a second...')
        if input_type == "fit_generator":
            num_of_classes = train_generator.num_classes()
        else:
            # Add background class
            num_of_classes = len(forest_object.labels.keys())

        model, training_model, prediction_model = create_models(
            backbone_retinanet=backbone.retinanet,
            num_classes=num_of_classes,
            weights=weights,
            multi_gpu=args.multi_gpu,
            freeze_backbone=args.freeze_backbone,
            lr=args.lr,
            config=args.config,
            targets=targets,
            freeze_layers=args.freeze_layers)

    # print model summary
    print(model.summary())

    # this lets the generator compute backbone layer shapes using the actual backbone model
    if 'vgg' in args.backbone or 'densenet' in args.backbone:
        train_generator.compute_shapes = make_shapes_callback(model)
        if validation_generator:
            validation_generator.compute_shapes = train_generator.compute_shapes

    # create the callbacks
    callbacks = create_callbacks(model, training_model, prediction_model,
                                 validation_generator, args, comet_experiment)

    if not args.compute_val_loss:
        validation_generator = None

    # start training
    if input_type == "fit_generator":
        history = training_model.fit_generator(
            generator=train_generator,
            steps_per_epoch=args.steps,
            epochs=args.epochs,
            verbose=1,
            callbacks=callbacks,
            workers=args.workers,
            use_multiprocessing=args.multiprocessing,
            max_queue_size=args.max_queue_size,
            validation_data=validation_generator)
    elif input_type == "tfrecord":

        # Fit model
        history = training_model.fit(x=inputs,
                                     steps_per_epoch=args.steps,
                                     epochs=args.epochs,
                                     callbacks=callbacks)
    else:
        raise ValueError(
            "{} input type is invalid. Only 'tfrecord' or 'for_generator' "
            "input types are accepted for model training".format(input_type))

    # Assign history to deepforest model class
    forest_object.history = history

    # return trained model
    return model, prediction_model, training_model