Example #1
0
    def _validate_data(self, data):
        """Check whether data is valid, try to convert with best effort if not"""
        if isinstance(data, pd.DataFrame):
            # TODO(zhreshold): allow custom label column without this renaming trick
            if self._label != 'label' and self._label in data.columns:
                # data is deepcopied so it's okay to overwrite directly
                data = data.rename(columns={
                    'label': '_unused_label',
                    self._label: 'label'
                },
                                   errors='ignore')

        if not (hasattr(data, 'classes') and hasattr(data, 'to_mxnet')):
            if isinstance(data, pd.DataFrame):
                # raw dataframe, try to add metadata automatically
                if 'label' in data.columns and 'image' in data.columns:
                    # check image relative/abs path is valid
                    sample = data.iloc[0]['image']
                    if not os.path.isfile(sample):
                        raise OSError(
                            f'Detected invalid image path `{sample}`, please ensure all image paths are absolute or you are using the right working directory.'
                        )
                    logger.log(
                        20,
                        'Converting raw DataFrame to ImagePredictor.Dataset...'
                    )
                    infer_classes = sorted(data.label.unique().tolist())
                    logger.log(
                        20,
                        f'Detected {len(infer_classes)} unique classes: {infer_classes}'
                    )
                    instruction = 'train_data = ImagePredictor.Dataset(train_data, classes=["foo", "bar"])'
                    logger.log(
                        20,
                        f'If you feel the `classes` is inaccurate, please construct the dataset explicitly, e.g. {instruction}'
                    )
                    data = _ImageClassification.Dataset(data,
                                                        classes=infer_classes)
                else:
                    err_msg = 'Unable to convert raw DataFrame to ImagePredictor Dataset, ' + \
                              '`image` and `label` columns are required.' + \
                              'You may visit `https://auto.gluon.ai/stable/tutorials/image_prediction/dataset.html` ' + \
                              'for details.'
                    raise AttributeError(err_msg)
            else:
                raise TypeError(
                    f"Unable to process dataset of type: {type(data)}")
        elif isinstance(data, _ImageClassification.Dataset):
            assert 'label' in data.columns
            assert hasattr(data, 'classes')
            # check whether classes are outdated, no action required if all unique labels is subset of `classes`
            unique_labels = sorted(data['label'].unique().tolist())
            if not (all(ulabel in data.classes for ulabel in unique_labels)):
                data = _ImageClassification.Dataset(data,
                                                    classes=unique_labels)
                logger.log(20, f'Reset labels to {unique_labels}')
        if len(data) < 1:
            raise ValueError('Empty dataset.')
        return data
Example #2
0
 def _validate_data(self, data):
     """Check whether data is valid, try to convert with best effort if not"""
     if not (hasattr(data, 'classes') and hasattr(data, 'to_mxnet')):
         if isinstance(data, pd.DataFrame):
             # raw dataframe, try to add metadata automatically
             if 'label' in data.columns and 'image' in data.columns:
                 # check image relative/abs path is valid
                 sample = data.iloc[0]['image']
                 if not os.path.isfile(sample):
                     raise OSError(f'Detected invalid image path `{sample}`, please ensure all image paths are absolute or you are using the right working directory.')
                 logger.log(20, 'Converting raw DataFrame to ImagePredictor.Dataset...')
                 infer_classes = list(data.label.unique().tolist())
                 logger.log(20, f'Detected {len(infer_classes)} unique classes: {infer_classes}')
                 instruction = 'train_data = ImagePredictor.Dataset(train_data, classes=["foo", "bar"])'
                 logger.log(20, f'If you feel the `classes` is inaccurate, please construct the dataset explicitly, e.g. {instruction}')
                 data = _ImageClassification.Dataset(data, classes=infer_classes)
             else:
                 err_msg = 'Unable to convert raw DataFrame to ImagePredictor Dataset, ' + \
                           '`image` and `label` columns are required.' + \
                           'You may visit `https://auto.gluon.ai/stable/tutorials/image_prediction/dataset.html` ' + \
                           'for details.'
                 raise AttributeError(err_msg)
     if len(data) < 1:
         raise ValueError('Empty dataset.')
     return data