def find_prediction_file(self, prediction_path: pathlib.Path) -> None:
        image_number: str = self.image_id.split('_')[1]

        if not self.attribute_id:
            prediction_file_candidates = [
                prediction_file
                for prediction_file in prediction_path.iterdir()
                if image_number in prediction_file.stem
            ]
        else:
            prediction_file_candidates = [
                prediction_file
                for prediction_file in prediction_path.iterdir()
                if image_number in prediction_file.stem
                and self.attribute_id in prediction_file.stem
            ]

        if not prediction_file_candidates:
            raise ScoreException(
                f'No matching submission for: {self.truth_file.name}')
        elif len(prediction_file_candidates) > 1:
            raise ScoreException(
                f'Multiple matching submissions for: {self.truth_file.name}')

        self.prediction_file = prediction_file_candidates[0]
def score(truth_path: pathlib.Path,
          prediction_path: pathlib.Path) -> ScoresType:
    for truth_file in truth_path.iterdir():
        if re.match(r'^ISIC.*GroundTruth\.csv$', truth_file.name):
            break
    else:
        raise ScoreException('Internal error, truth file could not be found.')

    prediction_files = [
        prediction_file for prediction_file in prediction_path.iterdir()
        if prediction_file.suffix.lower() == '.csv'
    ]
    if len(prediction_files) > 1:
        raise ScoreException(
            'Multiple prediction files submitted. Exactly one CSV file should be submitted.'
        )
    elif len(prediction_files) < 1:
        raise ScoreException(
            'No prediction files submitted. Exactly one CSV file should be submitted.'
        )
    prediction_file = prediction_files[0]

    with truth_file.open('rb') as truth_file_stream, prediction_file.open(
            'rb') as prediction_file_stream:
        return compute_metrics(truth_file_stream, prediction_file_stream)
Beispiel #3
0
def ensure_manuscript(prediction_path: pathlib.Path):
    manuscript_file_count = sum(
        manuscript_file.suffix.lower() == '.pdf'
        for manuscript_file in prediction_path.iterdir())
    if manuscript_file_count > 1:
        raise ScoreException(
            'Multiple PDFs submitted. Exactly one PDF file, containing the descriptive manuscript, '
            'must included in the submission.')
    elif manuscript_file_count < 1:
        raise ScoreException(
            'No PDF submitted. Exactly one PDF file, containing the descriptive manuscript, '
            'must included in the submission.')
def extract_zip(zip_path: pathlib.Path,
                output_path: pathlib.Path,
                flatten: bool = True) -> None:
    """Extract a zip file, optionally flattening it into a single directory."""
    try:
        with zipfile.ZipFile(zip_path) as zf:
            if flatten:
                for member_info in zf.infolist():
                    member_name = member_info.filename
                    if member_name.startswith('__MACOSX'):
                        # Ignore Mac OS X metadata
                        continue

                    member_base_name = os.path.basename(member_name)
                    if not member_base_name:
                        # Skip directories
                        continue

                    member_output_path = output_path / member_base_name

                    with zf.open(member_info
                                 ) as input_stream, member_output_path.open(
                                     'wb') as output_stream:
                        shutil.copyfileobj(input_stream, output_stream)
            else:
                zf.extractall(output_path)
    except zipfile.BadZipfile as e:
        raise ScoreException(
            f'Could not read ZIP file "{zip_path.name}": {str(e)}.')
 def load_prediction_image(self) -> None:
     self.prediction_image = load_segmentation_image(self.prediction_file)
     if self.prediction_image.shape[0:2] != self.truth_image.shape[0:2]:
         raise ScoreException(
             f'Image {self.prediction_file.name} has dimensions '
             f'{self.prediction_image.shape[0:2]}; expected {self.truth_image.shape[0:2]}.'
         )
Beispiel #6
0
def validate_rows(truth_probabilities: pd.DataFrame,
                  prediction_probabilities: pd.DataFrame):
    """
    Ensure prediction rows correspond to truth rows.

    Fail when predictionProbabilities is missing rows or has extra rows compared to
    truthProbabilities.
    """
    missing_images = truth_probabilities.index.difference(
        prediction_probabilities.index)
    if not missing_images.empty:
        raise ScoreException(
            f'Missing images in CSV: {missing_images.tolist()}.')

    extra_images = prediction_probabilities.index.difference(
        truth_probabilities.index)
    if not extra_images.empty:
        raise ScoreException(f'Extra images in CSV: {extra_images.tolist()}.')
Beispiel #7
0
def unzip_all(
    input_path: pathlib.Path,
    allow_manuscript_directory: bool = False
) -> Tuple[pathlib.Path, tempfile.TemporaryDirectory]:
    """
    Extract / copy all files in directory.

    Validates that the path contains exactly one file. Optionally allow an 'Abstract' directory to
    exist which contains exactly one manuscript file. Return a path to the extracted content.
    """
    input_files = [f for f in input_path.iterdir() if f.is_file()]
    input_dirs = [f for f in input_path.iterdir() if f.is_dir()]

    if len(input_files) > 1:
        raise ScoreException(
            'Multiple files submitted. Exactly one ZIP file should be submitted.'
        )
    elif len(input_files) < 1:
        raise ScoreException(
            'No files submitted. Exactly one ZIP file should be submitted.')

    input_file = input_files[0]

    manuscript_file = None

    if allow_manuscript_directory:
        if len(input_dirs) > 1:
            raise ScoreException('Internal error: multiple directories found.')
        elif len(input_dirs) == 1:
            input_dir = input_dirs[0]
            if input_dir.name != 'Abstract':
                raise ScoreException(
                    f'Internal error: unexpected directory found: {input_dir.name}.'
                )

            manuscript_files = list(input_dir.iterdir())
            if not manuscript_files:
                raise ScoreException('Empty manuscript directory found.')
            elif len(manuscript_files) > 1:
                raise ScoreException(
                    'Multiple files found in manuscript directory.')

            manuscript_file = manuscript_files[0]
    elif input_dirs:
        # Expect only files
        raise ScoreException('Internal error: unexpected directory found.')

    output_temp_dir = tempfile.TemporaryDirectory()
    output_path = pathlib.Path(output_temp_dir.name)

    if input_file.suffix.lower() == '.zip':
        extract_zip(input_file, output_path)
    else:
        shutil.copy(input_file, output_path)

    if manuscript_file is not None:
        shutil.copy(manuscript_file, output_path)

    return output_path, output_temp_dir
def load_segmentation_image(image_path: pathlib.Path) -> np.ndarray:
    """Load a segmentation image as a NumPy array, given a file path."""
    try:
        image: Image = Image.open(str(image_path))
    except Exception as e:
        raise ScoreException(
            f'Could not decode image "{image_path.name}" because: "{str(e)}"')

    if image.mode == '1':
        # NumPy crashes if a 1-bit (black and white) image is directly
        # coerced to an array
        image = image.convert('L')

    if image.mode != 'L':
        raise ScoreException(
            f'Image {image_path.name} is not single-channel (greyscale).')

    image = np.array(image)

    return image
def assert_binary_image(image: np.ndarray, image_path: pathlib.Path):
    """Ensure a NumPy array image is binary, correcting if possible."""
    image_values = set(np.unique(image))
    if image_values <= {0, 255}:
        # Expected values
        pass
    elif len(image_values) <= 2:
        # Binary image with high value other than 255 can be corrected
        high_value = (image_values - {0}).pop()
        image /= high_value
        image *= 255
        if set(np.unique(image)) > {0, 255}:
            raise ScoreException(
                f'Image {image_path.name} contains values other than 0 and 255.'
            )
    else:
        raise ScoreException(
            f'Image {image_path.name} contains values other than 0 and 255.')

    return image
    def parse_image_id(self):
        image_id_match: Match[str] = re.search(r'ISIC_[0-9]{7}',
                                               self.truth_file.stem)
        if not image_id_match:
            raise ScoreException(
                f'Internal error: unknown ground truth file: {self.truth_file.name}.'
            )
        self.image_id = image_id_match.group(0)

        attribute_id_match: Match[str] = re.search(r'attribute_([a-z_]+)',
                                                   self.truth_file.stem)
        if attribute_id_match:
            self.attribute_id = attribute_id_match.group(1)
Beispiel #11
0
def load_segmentation_image(image_path: pathlib.Path) -> np.ndarray:
    """Load a segmentation image as a NumPy array, given a file path."""
    try:
        with Image.open(image_path) as image:
            # Ensure the image is loaded, sometimes NumPy fails to get the "__array_interface__"
            image.load()

            if image.mode == '1':
                # NumPy crashes if a 1-bit (black and white) image is directly
                # coerced to an array
                image = image.convert('L')

            if image.mode != 'L':
                raise ScoreException(
                    f'Image {image_path.name} is not single-channel (greyscale).'
                )

            np_image = np.asarray(image)

    except UnidentifiedImageError:
        raise ScoreException(f'Could not decode image "{image_path.name}"')

    return np_image
Beispiel #12
0
def score_all(
    truth_input_path: pathlib.Path,
    prediction_input_path: pathlib.Path,
    task_num: int,
    require_manuscript: bool,
):
    # Unzip zip files contained in the input folders
    truth_path, truth_temp_dir = unzip_all(truth_input_path)

    prediction_path, prediction_temp_dir = unzip_all(
        prediction_input_path, allow_manuscript_directory=True)

    if require_manuscript:
        ensure_manuscript(prediction_path)

    if task_num == 1:
        score = task1.score
    elif task_num == 2:
        score = task2.score
    elif task_num == 3:
        score = task3.score
    else:
        raise ScoreException(
            f'Internal error: unknown ground truth phase number: {task_num}.')
    scores: ScoresType = score(truth_path, prediction_path)

    # Output in Covalic format
    print(
        json.dumps([{
            'dataset':
            dataset,
            'metrics': [{
                'name': metric_name,
                'value': metric_value
            } for metric_name, metric_value in metrics.items()],
        } for dataset, metrics in scores.items()]))

    truth_temp_dir.cleanup()
    prediction_temp_dir.cleanup()
Beispiel #13
0
def parse_csv(csv_file_stream: TextIO, categories: pd.Index) -> pd.DataFrame:
    try:
        probabilities = pd.read_csv(csv_file_stream, header=0, index_col=False)
    except pd.errors.ParserError as e:
        # TODO: Test this case
        raise ScoreException(f'Could not parse CSV: "{str(e)}"')

    if 'image' not in probabilities.columns:
        raise ScoreException('Missing column in CSV: "image".')

    # Pandas represents strings as 'O' (object)
    if probabilities['image'].dtype != np.dtype('O'):
        # Coercing to 'U' (unicode) ensures that even NaN values are converted;
        # however, the resulting type is still 'O'
        probabilities['image'] = probabilities['image'].astype(np.dtype('U'))

    probabilities['image'] = probabilities['image'].str.replace(r'\.jpg$',
                                                                '',
                                                                case=False)

    if not probabilities['image'].is_unique:
        duplicate_images = probabilities['image'][
            probabilities['image'].duplicated()].unique()
        raise ScoreException(
            f'Duplicate image rows detected in CSV: {duplicate_images.tolist()}.'
        )

    # The duplicate check is the same as performed by 'verify_integrity'
    probabilities.set_index('image',
                            drop=True,
                            inplace=True,
                            verify_integrity=False)

    missing_columns = categories.difference(probabilities.columns)
    if not missing_columns.empty:
        raise ScoreException(
            f'Missing columns in CSV: {missing_columns.tolist()}.')

    extra_columns = probabilities.columns.difference(categories)
    if not extra_columns.empty:
        raise ScoreException(
            f'Extra columns in CSV: {extra_columns.tolist()}.')

    # sort by the order in categories
    probabilities = probabilities.reindex(categories, axis='columns')

    missing_rows = probabilities[probabilities.isnull().any(
        axis='columns')].index
    if not missing_rows.empty:
        raise ScoreException(
            f'Missing value(s) in CSV for images: {missing_rows.tolist()}.')

    non_float_columns = probabilities.dtypes[probabilities.dtypes.apply(
        lambda x: x != np.float64)].index
    if not non_float_columns.empty:
        raise ScoreException(
            f'CSV contains non-floating-point value(s) in columns: {non_float_columns.tolist()}.'
        )
    # TODO: identify specific failed rows

    out_of_range_rows = probabilities[probabilities.applymap(
        lambda x: x < 0.0 or x > 1.0).any(axis='columns')].index
    if not out_of_range_rows.empty:
        raise ScoreException(
            f'Values in CSV are outside the interval [0.0, 1.0] for images: '
            f'{out_of_range_rows.tolist()}.')

    # TODO: fail on extra columns in data rows

    return probabilities