def _validate_data(self, data): """Check whether data is valid, try to convert with best effort if not""" if isinstance(data, pd.DataFrame): # TODO(zhreshold): allow custom label column without this renaming trick if self._label != 'label' and self._label in data.columns: # data is deepcopied so it's okay to overwrite directly data = data.rename(columns={ 'label': '_unused_label', self._label: 'label' }, errors='ignore') if not (hasattr(data, 'classes') and hasattr(data, 'to_mxnet')): if isinstance(data, pd.DataFrame): # raw dataframe, try to add metadata automatically if 'label' in data.columns and 'image' in data.columns: # check image relative/abs path is valid sample = data.iloc[0]['image'] if not os.path.isfile(sample): raise OSError( f'Detected invalid image path `{sample}`, please ensure all image paths are absolute or you are using the right working directory.' ) logger.log( 20, 'Converting raw DataFrame to ImagePredictor.Dataset...' ) infer_classes = sorted(data.label.unique().tolist()) logger.log( 20, f'Detected {len(infer_classes)} unique classes: {infer_classes}' ) instruction = 'train_data = ImagePredictor.Dataset(train_data, classes=["foo", "bar"])' logger.log( 20, f'If you feel the `classes` is inaccurate, please construct the dataset explicitly, e.g. {instruction}' ) data = _ImageClassification.Dataset(data, classes=infer_classes) else: err_msg = 'Unable to convert raw DataFrame to ImagePredictor Dataset, ' + \ '`image` and `label` columns are required.' + \ 'You may visit `https://auto.gluon.ai/stable/tutorials/image_prediction/dataset.html` ' + \ 'for details.' raise AttributeError(err_msg) else: raise TypeError( f"Unable to process dataset of type: {type(data)}") elif isinstance(data, _ImageClassification.Dataset): assert 'label' in data.columns assert hasattr(data, 'classes') # check whether classes are outdated, no action required if all unique labels is subset of `classes` unique_labels = sorted(data['label'].unique().tolist()) if not (all(ulabel in data.classes for ulabel in unique_labels)): data = _ImageClassification.Dataset(data, classes=unique_labels) logger.log(20, f'Reset labels to {unique_labels}') if len(data) < 1: raise ValueError('Empty dataset.') return data
def _validate_data(self, data): """Check whether data is valid, try to convert with best effort if not""" if not (hasattr(data, 'classes') and hasattr(data, 'to_mxnet')): if isinstance(data, pd.DataFrame): # raw dataframe, try to add metadata automatically if 'label' in data.columns and 'image' in data.columns: # check image relative/abs path is valid sample = data.iloc[0]['image'] if not os.path.isfile(sample): raise OSError(f'Detected invalid image path `{sample}`, please ensure all image paths are absolute or you are using the right working directory.') logger.log(20, 'Converting raw DataFrame to ImagePredictor.Dataset...') infer_classes = list(data.label.unique().tolist()) logger.log(20, f'Detected {len(infer_classes)} unique classes: {infer_classes}') instruction = 'train_data = ImagePredictor.Dataset(train_data, classes=["foo", "bar"])' logger.log(20, f'If you feel the `classes` is inaccurate, please construct the dataset explicitly, e.g. {instruction}') data = _ImageClassification.Dataset(data, classes=infer_classes) else: err_msg = 'Unable to convert raw DataFrame to ImagePredictor Dataset, ' + \ '`image` and `label` columns are required.' + \ 'You may visit `https://auto.gluon.ai/stable/tutorials/image_prediction/dataset.html` ' + \ 'for details.' raise AttributeError(err_msg) if len(data) < 1: raise ValueError('Empty dataset.') return data