def test_lengths(config): """Assert that a csv generator and tfrecords create the same number of images in a epoch""" created_records = tfrecords.create_tfrecords( annotations_file="tests/output/testfile_tfrecords.csv", class_file="tests/output/classes.csv", image_min_side=config["image-min-side"], backbone_model=config["backbone"], size=100, savedir="tests/output/") # tfdata tf_filenames = find_tf_filenames(path="tests/output/*.tfrecord") # keras generator backbone = models.backbone(config["backbone"]) generator = csv_generator.CSVGenerator( csv_data_file="tests/output/testfile_tfrecords.csv", csv_class_file="tests/output/classes.csv", image_min_side=config["image-min-side"], preprocess_image=backbone.preprocess_image, ) fit_genertor_length = generator.size() assert len(tf_filenames) == fit_genertor_length
def test_create_tfrecords(config): """This test is in flux due to the fact that tensorflow and cv2 resize methods are not identical: https://jricheimer.github.io/tensorflow/2019/02/11/resize-confusion/ """ created_records = tfrecords.create_tfrecords(annotations_file="tests/data/testfile_tfrecords.csv", class_file="tests/data/classes.csv", image_min_side=config["image-min-side"], backbone_model=config["backbone"], size=100, savedir="tests/data/") assert os.path.exists("tests/data/testfile_tfrecords_0.tfrecord") return created_records
def prepare_tfdataset(annotations): records_created = tfrecords.create_tfrecords( annotations_file=annotations, class_file="tests/data/classes.csv", image_min_side=800, backbone_model="resnet50", size=100, savedir="tests/data/") assert os.path.exists("tests/data/testfile_deepforest_0.tfrecord") return records_created
annotations = pd.read_csv( BASE_PATH + "pretraining/crops/pretraining.csv", names=["image_path", "xmin", "ymin", "xmax", "ymax", "label"]) #Select a set of n image annotations = annotations[annotations.image_path == "2019_DELA_5_423000_3601000_image_0.jpg"].copy() #Generate tfrecords annotations_file = BASE_PATH + "pretraining/crops/test.csv" annotations.to_csv(annotations_file, header=False, index=False) class_file = utilities.create_classes(annotations_file) tfrecords_path = tfrecords.create_tfrecords(annotations_file, class_file, size=1) print("Created {} tfrecords: {}".format(len(tfrecords_path), tfrecords_path)) inputs, targets = tfrecords.create_tensors(tfrecords_path) #### Fit generator ## comet_experiment = Experiment(api_key="ypQZhYfs3nSyKzOfz13iuJpj2", project_name="deepforest", workspace="bw4sz") comet_experiment.log_parameter("Type", "testing") comet_experiment.log_parameter("input_type", "fit_generator") #Create model fitgen_model = deepforest.deepforest() fitgen_model.config["epochs"] = 1
def generate_hand_annotations(DEBUG, BASE_PATH, FILEPATH, SIZE, config, dask_client): #Generate tfrecords dirname = "hand_annotations/" annotations_file = BASE_PATH + dirname + "crops/hand_annotations.csv" class_file = utilities.create_classes(annotations_file) if DEBUG: tfrecords.create_tfrecords(annotations_file=annotations_file, class_file=class_file, image_min_side=config["image-min-side"], backbone_model=config["backbone"], size=SIZE, savedir=FILEPATH + dirname + "tfrecords/") else: #Collect annotation files for each tile annotations_file= BASE_PATH + dirname + "crops/hand_annotations.csv" df = pd.read_csv(annotations_file, names=["image_path","xmin","ymin","xmax","ymax","label"]) #enforce dtype, as there might be errors df.xmin = df.xmin.astype(pd.Int64Dtype()) df.ymin = df.ymin.astype(pd.Int64Dtype()) df.xmax = df.xmax.astype(pd.Int64Dtype()) df.ymax = df.ymax.astype(pd.Int64Dtype()) #Randomize rows df = df.sample(frac=1) #split pandas frame into chunks images = df.image_path.unique() indices = np.arange(len(images)) size = 500 chunk_list = [ ] #Split dataframe into chunks of images and write to file for i in range(ceil(len(indices) / size)): image_indices = indices[i * size:(i * size) + size] selected_images = images[image_indices] split_frame = df[df.image_path.isin(selected_images)] filename = BASE_PATH + dirname + "crops/hand_annotations{}.csv".format(i) split_frame.to_csv(filename, header=False,index=False) chunk_list.append(filename) print(" Created {} files to create tfrecords".format(len(chunk_list))) #Apply create tfrecords to each futures = dask_client.map( tfrecords.create_tfrecords, chunk_list, class_file=class_file, image_min_side=config["image-min-side"], backbone_model=config["backbone"], size=SIZE, savedir=FILEPATH + dirname + "tfrecords/") wait(futures) for future in futures: try: local_annotations = future.result() except Exception as e: print("future {} failed with {}".format(future, e))