Ejemplo n.º 1
0
def quickstart(client, train_dataset_path, val_dataset_path, gpus, hours,
               query_paths):
    '''
        Conducts a full train-inference flow on the Fashion MNIST dataset with
        models `SkDt` and `TfFeedForward` for the task `IMAGE_CLASSIFICATION`.
    '''

    task = 'IMAGE_CLASSIFICATION'

    # Randomly generate app & model names to avoid naming conflicts
    app_id = gen_id()
    app = 'image_classification_app_{}'.format(app_id)
    tf_model_name = 'TfFeedForward_{}'.format(app_id)
    sk_model_name = 'SkDt_{}'.format(app_id)

    print('Preprocessing datasets...')
    load_fashion_mnist(train_dataset_path, val_dataset_path)

    print('Creating & uploading datasets onto SINGA-Auto...')
    train_dataset = client.create_dataset('{}_train'.format(app), task,
                                          train_dataset_path)
    pprint(train_dataset)
    val_dataset = client.create_dataset('{}_val'.format(app), task,
                                        val_dataset_path)
    pprint(val_dataset)

    print('Adding models "{}" and "{}" to SINGA-Auto...'.format(
        tf_model_name, sk_model_name))
    tf_model = client.create_model(
        tf_model_name,
        task,
        'examples/models/image_classification/TfFeedForward.py',
        'TfFeedForward',
        dependencies={ModelDependency.TENSORFLOW: '1.12.0'})
    pprint(tf_model)
    sk_model = client.create_model(
        sk_model_name,
        task,
        'examples/models/image_classification/SkDt.py',
        'SkDt',
        dependencies={ModelDependency.SCIKIT_LEARN: '0.20.0'})
    pprint(sk_model)
    model_ids = [tf_model['id'], sk_model['id']]

    print('Creating train job for app "{}" on SINGA-Auto...'.format(app))

    budget = {BudgetOption.TIME_HOURS: hours, BudgetOption.GPU_COUNT: gpus}
    train_job = client.create_train_job(app,
                                        task,
                                        train_dataset['id'],
                                        val_dataset['id'],
                                        budget,
                                        models=model_ids)
    pprint(train_job)

    print('Waiting for train job to complete...')
    print('This might take a few minutes')
    wait_until_train_job_has_stopped(client, app)
    print('Train job has been stopped')

    print('Listing best trials of latest train job for app "{}"...'.format(app))
    pprint(client.get_best_trials_of_train_job(app))

    print('Creating inference job for app "{}" on SINGA-Auto...'.format(app))
    pprint(client.create_inference_job(app))
    predictor_host = get_predictor_host(client, app)
    if not predictor_host:
        raise Exception('Inference job has errored')
    print('Inference job is running!')

    print('Making predictions for query images:')
    print(query_paths)
    queries = utils.dataset.load_images(query_paths).tolist()
    predictions = make_predictions(client, predictor_host, queries)
    print('Predictions are:')
    print(predictions)

    print('Stopping inference job...')
    pprint(client.stop_inference_job(app))
Ejemplo n.º 2
0
def image_dataset_download_load_and_split(dataset_name, data_dir='data', min_image_size=None, max_image_size=None, mode=None, image_scale_size=None, dataset_types=['train','test']):
    ''' 
    :return: x_train, y_train, x_test, y_test. 

    x_train,  x_test are lists of images in torch tensor format.
    y_train, y_test are lists of labels, corresponding to the x_ sets.
    
    :param dataset_name: list of str.
    :param if_download: True/False. True means to download datasets to the nominated data_dir. False stands for loading the existing datasets.
    :param data_dir: directory that datasets 'download to' or 'load from'.
    '''
    # if the dataset is not prepared in the directory yet
    assert min_image_size<=max_image_size
    if dataset_name == 'cifar10':
        if os.path.exists(data_dir+'cifar10_train.zip') is False or os.path.exists(data_dir+'cifar10_test.zip') is False :
            print ('Dataset Downloading ... ')
            from examples.datasets.image_files.load_cifar10 import load_cifar10
            # This will generate train, test, val datasets to 'data/' directory. When validation_split set to 0, means the val dataset is empty.
            load_cifar10(limit=None, validation_split=0,
                out_train_dataset_path=data_dir+'cifar10_train.zip',
                out_val_dataset_path=data_dir+'cifar10_val.zip',
                out_test_dataset_path=data_dir+'cifar10_test.zip',
                out_meta_csv_path=data_dir+'cifar10_meta.csv')
    elif dataset_name == 'fashion_mnist':
        if os.path.exists(data_dir+'fashion_mnist_train.zip') is False or os.path.exists(data_dir+'fashion_mnist_test.zip') is False :
            print ('Dataset Downloading ... ')
            from examples.datasets.image_files.load_fashion_mnist import load_fashion_mnist
            # This will generate train, test, val datasets to 'data/' directory. When validation_split set to 0, means the val dataset is empty.
            load_fashion_mnist(limit=None, validation_split=0,
                out_train_dataset_path=data_dir+'fashion_mnist_train.zip',
                out_val_dataset_path=data_dir+'fashion_mnist_val.zip',
                out_meta_csv_path=data_dir+'fashion_mnist_meta.csv',
                out_test_dataset_path=data_dir+'fashion_mnist_test.zip')

    elif dataset_name == 'xray' or dataset_name == 'chest-xray-pneumonia':
        if os.path.exists(data_dir+'chest-xray-pneumonia.zip') is False:
            print ('Dataset Downloading ... ')
            import kaggle
            kaggle.api.authenticate()
            kaggle.api.dataset_download_files('paultimothymooney/chest-xray-pneumonia', path=data_dir, unzip=False)

    # reading downloaded dataset zipfiles
    if dataset_name == 'xray' or dataset_name == 'chest-xray-pneumonia':
        datasets_loaded = processing_and_reading_downloaded_xray('chest-xray-pneumonia', 
                                                   data_dir, 
                                                   min_image_size=min_image_size, 
                                                   max_image_size=max_image_size, 
                                                   mode=mode, 
                                                   image_scale_size=image_scale_size)
    else:
        datasets_loaded = dict()
        for dataset_type in dataset_types:
            (x, y) = processing_and_reading_existing_datasets(dataset_name, 
                                                              data_dir=data_dir,  
                                                              min_image_size=min_image_size, 
                                                              max_image_size=max_image_size, 
                                                              mode=mode, 
                                                              image_scale_size=image_scale_size,
                                                              dataset_type=dataset_type)
            datasets_loaded.update({f'x_{dataset_type}': x, f'y_{dataset_type}': y})

    return datasets_loaded