Example #1
0
def test_get_all_images_with_pattern(pattern, tmpdir):
    """It gets all image files"""
    image_extensions = ["tif", "tiff", "jpg", "jpeg", "png"]
    a_dir = tmpdir.mkdir("image_dir")
    for number in range(2):
        for ext in image_extensions:
            file = a_dir.join(f"file{pattern}_{number}.{ext}")
            file.ensure()
    for ext in image_extensions:
        matches = core.get_image_files_from_pattern(a_dir, None, ext)
        files = [f for f in Path(a_dir).rglob(f"**/{ext}") if f.is_file()]
        assert len(list(matches)) == 2

    matches = core.get_image_files_from_pattern(a_dir, pattern, None)
    files = [f for f in Path(a_dir).rglob("**/*") if f.is_file()]
    assert len(files) == 2 * len(image_extensions)
    assert len(list(matches)) == 2 * len(image_extensions)  # all image files matched
Example #2
0
def predict_directory(
    directory: Path = typer.Argument(
        ...,
        readable=True,
        resolve_path=True,
        help="Directory to start searching for images from",
    ),
    csv_save_dir: Path = typer.Argument(
        ...,
        writable=True,
        resolve_path=True,
        help="Directory used to store the csv report",
    ),
    pattern: str = typer.Option("fs", help="Pattern used to filter image filenames"),
    bs: int = typer.Option(16, help="Batch Size"),
    image_format: str = typer.Option(
        ".tif",
        help="Image format for flyswot to use for predictions, defaults to `*.tif`",
    ),
    model_name: str = typer.Option(
        "latest", help="Which model flyswot should use for making predictions"
    ),
    model_path: str = None,
):
    """Predicts against all images stored under DIRECTORY which match PATTERN in the filename.

    By default searches for filenames containing 'fs'.

    Creates a CSV report saved to `csv_save_dir`
    """
    start_time = time.perf_counter()
    model_dir = models.ensure_model_dir()
    model = models.ensure_model(model_dir)
    # if model_name != "latest" and not model_path:
    #     model_parts = models._get_model_parts(Path(model_dir / Path(model_name)))
    # if model_name != "latest" and model_path:
    #     model_parts = models._get_model_parts(Path(model_path / Path(model_name)))
    onnxinference = OnnxInferenceSession(model.model, model.vocab)
    files = sorted(core.get_image_files_from_pattern(directory, pattern, image_format))
    check_files(files, pattern, directory)
    typer.echo(f"Found {len(files)} files matching {pattern} in {directory}")
    csv_fname = create_csv_fname(csv_save_dir)
    with typer.progressbar(length=len(files)) as progress:
        images_checked = 0
        for i, batch in enumerate(itertoolz.partition_all(bs, files)):
            batch_predictions = onnxinference.predict_batch(batch, bs)
            if i == 0:  # pragma: no cover
                create_csv_header(batch_predictions, csv_fname)
            write_batch_preds_to_csv(batch_predictions, csv_fname)
            progress.update(len(batch))
            images_checked += len(batch)
    delta = timedelta(seconds=time.perf_counter() - start_time)
    print_inference_summary(
        str(delta), pattern, directory, csv_fname, image_format, images_checked
    )
Example #3
0
def test_get_all_images(ext, tmpdir):
    """It gets all image files"""
    a_dir = tmpdir.mkdir("image_dir")
    for number in range(50):
        file = a_dir.join(f"file_{number}.{ext}")
        file.ensure()
    for i in range(5):  # create 25 files in 5 subfolders
        a_sub_dir = a_dir.mkdir(f"{i}_dir")
        for i in range(5):
            file = a_sub_dir.join(f"file_{i}.{ext}")
            file.ensure()
    matches = core.get_image_files_from_pattern(a_dir, None, None)
    files = [f for f in Path(a_dir).rglob("**/*") if f.is_file()]
    assert len(files) == 50 + 25
    assert len(list(matches)) == 50 + 25  # all image files matched
Example #4
0
def test_filter_with_front(fname, tmpdir):
    """It filters files from pattern"""
    a_dir = tmpdir.mkdir("image_dir")
    for number in range(50):
        file = a_dir.join(f"file_{fname}_{number}.tif")
        file.ensure()
        file2 = a_dir.join(f"file_{number}.jpg")
        file2.ensure()
    for i in range(5):  # create 25 files in 5 subfolders
        a_sub_dir = a_dir.mkdir(f"{i}_dir")
        for i in range(5):
            file = a_sub_dir.join(f"file_{fname}_{i}.tif")
            file.ensure()
    matches = core.get_image_files_from_pattern(a_dir, fname, ".tif")
    files = [f for f in Path(a_dir).rglob("**/*") if f.is_file()]
    assert len(files) == (50 * 2) + 25
    assert len(list(matches)) == 50 + 25
Example #5
0
def predict_directory(
    directory: Path = typer.Argument(
        ...,
        readable=True,
        resolve_path=True,
        help="Directory to start searching for images from",
    ),
    csv_save_dir: Path = typer.Argument(
        ...,
        writable=True,
        resolve_path=True,
        help="Directory used to store the csv report",
    ),
    model_id: str = typer.Option(
        "flyswot/convnext-tiny-224_flyswot",
        help="The model flyswot should use for making predictions",
    ),
    pattern: str = typer.Option(None,
                                help="Pattern used to filter image filenames"),
    bs: int = typer.Option(16, help="Batch Size"),
    image_formats: List[str] = typer.Option(
        default=[".tif"],
        help="Image format(s) to check",
    ),
):
    """Predicts against all images stored under DIRECTORY which match PATTERN in the filename.

    By default searches for filenames containing 'fs'.

    Creates a CSV report saved to `csv_save_dir`
    """
    start_time = time.perf_counter()
    huggingfaceinference = HuggingFaceInferenceSession(model=model_id)
    files = sorted(
        itertoolz.concat(
            core.get_image_files_from_pattern(directory, pattern, image_format)
            for image_format in image_formats))
    check_files(files, pattern, directory)
    if not pattern:
        pattern = "any pattern"
    print(
        f"Found {len(files)} files matching {pattern} in {directory} with extension(s) {image_formats}"
    )
    csv_fname = create_csv_fname(csv_save_dir)
    corrupt_images, images_checked = predict_files(
        files,
        inference_session=huggingfaceinference,
        bs=bs,
        csv_fname=csv_fname)
    if corrupt_images:
        print(corrupt_images)
    delta = timedelta(seconds=time.perf_counter() - start_time)
    print_inference_summary(
        str(delta),
        pattern,
        directory,
        csv_fname,
        image_formats,
        images_checked,
        model_id,
    )