Example #1
0
    def read_data(self, mode="train", validation_split=False):
        """Read tfrecord into datasets from config
            Args:
                validation_split: True -> split tfrecords into train test. This overrides the evaluation config!
            """
        self.train_records = glob.glob(
            os.path.join(self.config["train"]["tfrecords"], "*.tfrecord"))

        if validation_split:
            print("Splitting training set into train-test")
            train_df = pd.Series(self.train_records)
            #Sample with set seed to make it the same between runs
            self.train_split_records = train_df.head(
                int(0.9 * train_df.shape[0])).values
            self.test_split_records = train_df[~(
                train_df.isin(self.train_split_records))].values

            #Create training tf.data
            self.train_split = make_dataset.tf_dataset(
                tfrecords=self.train_split_records,
                batch_size=self.config["train"]["batch_size"],
                shuffle=self.config["train"]["shuffle"],
                mode=mode,
                cores=self.config["cpu_workers"])

            #Create testing tf.data
            self.val_split = make_dataset.tf_dataset(
                tfrecords=self.test_split_records,
                batch_size=self.config["train"]["batch_size"],
                shuffle=self.config["train"]["shuffle"],
                mode=mode,
                cores=self.config["cpu_workers"])
        else:
            #Create training tf.data
            self.train_split = make_dataset.tf_dataset(
                tfrecords=self.train_records,
                batch_size=self.config["train"]["batch_size"],
                shuffle=self.config["train"]["shuffle"],
                mode=mode,
                cores=self.config["cpu_workers"])

            #honor config if validation not set
            self.val_split = None
            if self.config["evaluation"]["tfrecords"] is not None:
                self.test_records = glob.glob(
                    os.path.join(self.config["evaluation"]["tfrecords"],
                                 "*.tfrecord"))

                self.val_split = make_dataset.tf_dataset(
                    tfrecords=self.test_records,
                    batch_size=self.config["train"]["batch_size"],
                    shuffle=self.config["train"]["shuffle"],
                    mode=mode,
                    cores=self.config["cpu_workers"])
Example #2
0
    def predict_raster(self, tfrecords, batch_size=1):
        """Predicted a set of tfrecords and create a raster image"""
        prediction_set = make_dataset.tf_dataset(
            tfrecords=tfrecords,
            batch_size=batch_size,
            shuffle=False,
            mode="predict",
            cores=self.config["cpu_workers"])

        predictions = []
        row_list = []
        col_list = []
        for image, x, y in prediction_set:
            try:
                softmax_batch = self.model.predict_on_batch(image)
                row_list.append(x.numpy())
                col_list.append(y.numpy())
                predictions.append(softmax_batch)
            except tf.errors.OutOfRangeError:
                print("Completed {} predictions".format(len(predictions)))

        #stack
        predictions = np.vstack(predictions)
        row_list = np.concatenate(row_list)
        col_list = np.concatenate(col_list)
        predictions = np.argmax(predictions, 1)
        results = pd.DataFrame({
            "label": predictions,
            "row": row_list,
            "col": col_list
        })
        results = results.sort_values(by=["row", "col"])

        return results
    def predict_boxes(self, tfrecords, batch_size=1, majority_vote=True):
        """Predicted a set of tfrecords and create a raster image"""
        prediction_set = tf_dataset(tfrecords=tfrecords,
                                    batch_size=batch_size,
                                    shuffle=False,
                                    mode="box",
                                    cores=self.config["cpu_workers"])

        predictions = []
        indices = []
        for image, box_index in prediction_set:
            try:
                softmax_batch = self.model.predict_on_batch(image)
                predictions.append(softmax_batch)
                indices.append(box_index)
            except tf.errors.OutOfRangeError:
                print("Completed {} predictions".format(len(predictions)))

        #stack
        predictions = np.vstack(predictions)
        predictions = np.argmax(predictions, 1)
        
        indices = np.concatenate(indices)
        labels = [self.config["class_labels"][x] for x in predictions]
        results = pd.DataFrame({"label": labels, "box_index": indices})
        
        if majority_vote:
            majority_results = results.groupby("box_index").agg(pd.Series.mode).to_frame()
            return majority_results
        else:
            return results
def test_tf_dataset_train(train_tfrecords, ground_truth_raster):
    print(train_tfrecords)
    #Tensorflow encodes string as b bytes
    dataset = make_dataset.tf_dataset(train_tfrecords,
                                      batch_size=6,
                                      shuffle=False)

    counter = 0
    for data, label in dataset.take(2):  # turn off repeat
        numpy_data = data.numpy()
        assert numpy_data.shape[0] == 6
        assert not numpy_data.sum() == 0

    #full dataset
    dataset = make_dataset.tf_dataset(train_tfrecords,
                                      batch_size=6,
                                      shuffle=False)

    counter = 0
    labels = []
    center_pixels = []
    for data, label in dataset:  # turn off repeat
        counter += data.shape[0]
        center_pixels.append(data[:, 4, 4, 0])
        labels.append(label)

    labels = np.vstack(labels)
    labels = np.argmax(labels, 1)

    center_pixels = np.concatenate(center_pixels)

    #one epoch should be every pixel in the raster minus the 0 label pixels
    src = rasterio.open(ground_truth_raster)
    raster = src.read()
    non_zero = raster[raster != 0]
    assert counter == len(non_zero)

    #The unique values should have the same indexing
    np.testing.assert_equal(np.unique(non_zero), np.unique(labels))

    #same frequency, take one values as an example
    assert len(raster[raster == 7]) == len(labels[labels == 7])
def test_tf_dataset_predict(predict_tfrecords):
    #Tensorflow encodes string as b bytes
    dataset = make_dataset.tf_dataset(predict_tfrecords,
                                      batch_size=20,
                                      shuffle=False,
                                      mode="predict")

    counter = 0
    for data, x, y in dataset:  # only take first element of dataset
        numpy_data = data.numpy()
        assert not numpy_data.sum() == 0
        x = x.numpy()
        y = y.numpy()
        counter += data.shape[0]

    assert counter == 25 * 50