def _validate_data(self, data): """Check whether data is valid, try to convert with best effort if not""" if isinstance(data, pd.DataFrame): # TODO(zhreshold): allow custom label column without this renaming trick if self._label != 'label' and self._label in data.columns: # data is deepcopied so it's okay to overwrite directly data = data.rename(columns={ 'label': '_unused_label', self._label: 'label' }, errors='ignore') if not (hasattr(data, 'classes') and hasattr(data, 'to_mxnet')): if isinstance(data, pd.DataFrame): # raw dataframe, try to add metadata automatically if 'label' in data.columns and 'image' in data.columns: # check image relative/abs path is valid sample = data.iloc[0]['image'] if not os.path.isfile(sample): raise OSError( f'Detected invalid image path `{sample}`, please ensure all image paths are absolute or you are using the right working directory.' ) logger.log( 20, 'Converting raw DataFrame to ImagePredictor.Dataset...' ) infer_classes = sorted(data.label.unique().tolist()) logger.log( 20, f'Detected {len(infer_classes)} unique classes: {infer_classes}' ) instruction = 'train_data = ImagePredictor.Dataset(train_data, classes=["foo", "bar"])' logger.log( 20, f'If you feel the `classes` is inaccurate, please construct the dataset explicitly, e.g. {instruction}' ) data = _ImageClassification.Dataset(data, classes=infer_classes) else: err_msg = 'Unable to convert raw DataFrame to ImagePredictor Dataset, ' + \ '`image` and `label` columns are required.' + \ 'You may visit `https://auto.gluon.ai/stable/tutorials/image_prediction/dataset.html` ' + \ 'for details.' raise AttributeError(err_msg) else: raise TypeError( f"Unable to process dataset of type: {type(data)}") elif isinstance(data, _ImageClassification.Dataset): assert 'label' in data.columns assert hasattr(data, 'classes') # check whether classes are outdated, no action required if all unique labels is subset of `classes` unique_labels = sorted(data['label'].unique().tolist()) if not (all(ulabel in data.classes for ulabel in unique_labels)): data = _ImageClassification.Dataset(data, classes=unique_labels) logger.log(20, f'Reset labels to {unique_labels}') if len(data) < 1: raise ValueError('Empty dataset.') return data
def test_time_out_image_classification(): time_limit = 30 from gluoncv.auto.tasks import ImageClassification task = ImageClassification({'num_trials': 1, 'epochs': 50}) tic = time.time() classifier = task.fit(IMAGE_CLASS_DATASET, time_limit=time_limit) # check time_limit with a little bit overhead assert (time.time() - tic) < time_limit + 180
def test_torch_image_classification_custom_net(): from gluoncv.auto.tasks import ImageClassification from timm import create_model import torch.nn as nn net = create_model('resnet18') net.fc = nn.Linear(512, 4) task = ImageClassification({'num_trials': 1, 'epochs': 1, 'custom_net': net, 'batch_size': 8}) classifier = task.fit(IMAGE_CLASS_DATASET) assert task.fit_summary().get('valid_acc', 0) > 0 test_result = classifier.predict(IMAGE_CLASS_TEST)
def test_image_classification(): from gluoncv.auto.tasks import ImageClassification task = ImageClassification({ 'model': 'resnet18_v1', 'num_trials': 1, 'epochs': 1, 'batch_size': 8 }) classifier = task.fit(IMAGE_CLASS_DATASET) assert task.fit_summary().get('valid_acc', 0) > 0 test_result = classifier.predict(IMAGE_CLASS_TEST)
def test_image_classification_custom_net(): from gluoncv.auto.tasks import ImageClassification from gluoncv.model_zoo import get_model net = get_model('resnet18_v1') task = ImageClassification({ 'num_trials': 1, 'epochs': 1, 'custom_net': net }) classifier = task.fit(IMAGE_CLASS_DATASET) assert task.fit_summary().get('valid_acc', 0) > 0 test_result = classifier.predict(IMAGE_CLASS_TEST)
def _validate_data(self, data): """Check whether data is valid, try to convert with best effort if not""" if not (hasattr(data, 'classes') and hasattr(data, 'to_mxnet')): if isinstance(data, pd.DataFrame): # raw dataframe, try to add metadata automatically if 'label' in data.columns and 'image' in data.columns: # check image relative/abs path is valid sample = data.iloc[0]['image'] if not os.path.isfile(sample): raise OSError(f'Detected invalid image path `{sample}`, please ensure all image paths are absolute or you are using the right working directory.') logger.log(20, 'Converting raw DataFrame to ImagePredictor.Dataset...') infer_classes = list(data.label.unique().tolist()) logger.log(20, f'Detected {len(infer_classes)} unique classes: {infer_classes}') instruction = 'train_data = ImagePredictor.Dataset(train_data, classes=["foo", "bar"])' logger.log(20, f'If you feel the `classes` is inaccurate, please construct the dataset explicitly, e.g. {instruction}') data = _ImageClassification.Dataset(data, classes=infer_classes) else: err_msg = 'Unable to convert raw DataFrame to ImagePredictor Dataset, ' + \ '`image` and `label` columns are required.' + \ 'You may visit `https://auto.gluon.ai/stable/tutorials/image_prediction/dataset.html` ' + \ 'for details.' raise AttributeError(err_msg) if len(data) < 1: raise ValueError('Empty dataset.') return data
def test_image_classification(): from gluoncv.auto.tasks import ImageClassification task = ImageClassification({'num_trials': 1}) classifier = task.fit(IMAGE_CLASS_DATASET) assert task.fit_summary.get('valid_acc', 0) > 0 test_result = classifier.predict(IMAGE_CLASS_TEST)
'batch_size': ag.Int(4, 7), # [16, 32, 64, 128] 'momentum': ag.Real(0.85, 0.95), 'wd': ag.Real(1e-6, 1e-2, log=True), 'epochs': 15, 'num_trials': args.num_trials, 'search_strategy': 'bayesopt' } # specify learning task task = ImageClassification(config) # specify dataset dataset = Dataset.get(args.dataset) train_data, valid_data = dataset.split(0.8) # fit auto estimator classifier = task.fit(train_data, valid_data) # evaluate auto estimator top1, top5 = classifier.evaluate(valid_data) logging.info('evaluation: top1={}, top5={}'.format(top1, top5)) # save and load auto estimator classifier.save('classifier.pkl') classifier = ImageClassification.load('classifier.pkl')