Example #1
0
def train_densenet(client, train_dataset_path, val_dataset_path, gpus, hours):
    '''
        Conducts training of model `PyDenseNetBc` on the CIFAR-10 dataset for IMAGE_CLASSIFICATION.
        Demonstrates hyperparameter tuning with distributed parameter sharing on SINGA-Auto.
    '''
    task = 'IMAGE_CLASSIFICATION'

    app_id = gen_id()
    app = 'cifar10_densenet_{}'.format(app_id)
    model_name = 'PyDenseNetBc_{}'.format(app_id)

    print('Preprocessing datasets...')
    load_cifar10(train_dataset_path, val_dataset_path)

    print('Creating & uploading datasets onto SINGA-Auto...')
    train_dataset = client.create_dataset('{}_train'.format(app), task,
                                          train_dataset_path)
    pprint(train_dataset)
    val_dataset = client.create_dataset('{}_val'.format(app), task,
                                        val_dataset_path)
    pprint(val_dataset)

    print('Creating model...')
    model = client.create_model(
        name=model_name,
        task='IMAGE_CLASSIFICATION',
        model_file_path='examples/models/image_classification/PyDenseNetBc.py',
        model_class='PyDenseNetBc',
        dependencies={
            ModelDependency.TORCH: '1.0.1',
            ModelDependency.TORCHVISION: '0.2.2'
        })
    pprint(model)

    print('Creating train job...')
    budget = {BudgetOption.TIME_HOURS: hours, BudgetOption.GPU_COUNT: gpus}
    train_job = client.create_train_job(app,
                                        task,
                                        train_dataset['id'],
                                        val_dataset['id'],
                                        budget,
                                        models=[model['id']])
    pprint(train_job)

    print('Monitor the train job on SINGA-Auto Web Admin')
Example #2
0
def run_enas(client, train_dataset_path, val_dataset_path, gpus, hours):
    '''
        Conducts training of model `TfEnas` on the CIFAR-10 dataset for IMAGE_CLASSIFICATION.
        Demonstrates architecture tuning with ENAS on SINGA-Auto.
    '''
    task = 'IMAGE_CLASSIFICATION'

    app_id = gen_id()
    app = 'cifar10_enas_{}'.format(app_id)
    model_name = 'TfEnas_{}'.format(app_id)

    print('Preprocessing datasets...')
    load_cifar10(train_dataset_path, val_dataset_path)

    print('Creating & uploading datasets onto SINGA-Auto...')
    train_dataset = client.create_dataset('{}_train'.format(app), task,
                                          train_dataset_path)
    pprint(train_dataset)
    val_dataset = client.create_dataset('{}_val'.format(app), task,
                                        val_dataset_path)
    pprint(val_dataset)

    print('Creating model...')
    model = client.create_model(
        name=model_name,
        task='IMAGE_CLASSIFICATION',
        model_file_path='examples/models/image_classification/TfEnas.py',
        model_class='TfEnas',
        dependencies={ModelDependency.TENSORFLOW: '1.12.0'})
    pprint(model)

    print('Creating train job...')
    budget = {BudgetOption.TIME_HOURS: hours, BudgetOption.GPU_COUNT: gpus}
    train_job = client.create_train_job(app,
                                        task,
                                        train_dataset['id'],
                                        val_dataset['id'],
                                        budget,
                                        models=[model['id']])
    pprint(train_job)

    print('Monitor the train job on SINGA-Auto Web Admin')
Example #3
0
def quickstart(client, train_dataset_path, val_dataset_path, gpus, hours,
               query_paths):
    '''
        Conducts a full train-inference flow on the Fashion MNIST dataset with
        models `SkDt` and `TfFeedForward` for the task `IMAGE_CLASSIFICATION`.
    '''

    task = 'IMAGE_CLASSIFICATION'

    # Randomly generate app & model names to avoid naming conflicts
    app_id = gen_id()
    app = 'image_classification_app_{}'.format(app_id)
    tf_model_name = 'TfFeedForward_{}'.format(app_id)
    sk_model_name = 'SkDt_{}'.format(app_id)

    print('Preprocessing datasets...')
    load_fashion_mnist(train_dataset_path, val_dataset_path)

    print('Creating & uploading datasets onto SINGA-Auto...')
    train_dataset = client.create_dataset('{}_train'.format(app), task,
                                          train_dataset_path)
    pprint(train_dataset)
    val_dataset = client.create_dataset('{}_val'.format(app), task,
                                        val_dataset_path)
    pprint(val_dataset)

    print('Adding models "{}" and "{}" to SINGA-Auto...'.format(
        tf_model_name, sk_model_name))
    tf_model = client.create_model(
        tf_model_name,
        task,
        'examples/models/image_classification/TfFeedForward.py',
        'TfFeedForward',
        dependencies={ModelDependency.TENSORFLOW: '1.12.0'})
    pprint(tf_model)
    sk_model = client.create_model(
        sk_model_name,
        task,
        'examples/models/image_classification/SkDt.py',
        'SkDt',
        dependencies={ModelDependency.SCIKIT_LEARN: '0.20.0'})
    pprint(sk_model)
    model_ids = [tf_model['id'], sk_model['id']]

    print('Creating train job for app "{}" on SINGA-Auto...'.format(app))

    budget = {BudgetOption.TIME_HOURS: hours, BudgetOption.GPU_COUNT: gpus}
    train_job = client.create_train_job(app,
                                        task,
                                        train_dataset['id'],
                                        val_dataset['id'],
                                        budget,
                                        models=model_ids)
    pprint(train_job)

    print('Waiting for train job to complete...')
    print('This might take a few minutes')
    wait_until_train_job_has_stopped(client, app)
    print('Train job has been stopped')

    print('Listing best trials of latest train job for app "{}"...'.format(app))
    pprint(client.get_best_trials_of_train_job(app))

    print('Creating inference job for app "{}" on SINGA-Auto...'.format(app))
    pprint(client.create_inference_job(app))
    predictor_host = get_predictor_host(client, app)
    if not predictor_host:
        raise Exception('Inference job has errored')
    print('Inference job is running!')

    print('Making predictions for query images:')
    print(query_paths)
    queries = utils.dataset.load_images(query_paths).tolist()
    predictions = make_predictions(client, predictor_host, queries)
    print('Predictions are:')
    print(predictions)

    print('Stopping inference job...')
    pprint(client.stop_inference_job(app))