예제 #1
0
def run_pos_tagging(client, gpus):
    task = TaskType.POS_TAGGING

    # Randomly generate app & model names to avoid naming conflicts
    app_id = gen_id()
    app = 'pos_tagging_app_{}'.format(app_id)
    bihmm_model_name = 'BigramHmm_{}'.format(app_id)
    py_model_name = 'PyBiLstm_{}'.format(app_id)

    print('Adding models "{}" and "{}" to Rafiki...'.format(
        bihmm_model_name, py_model_name))
    bihmm_model = client.create_model(bihmm_model_name, task, 'examples/models/pos_tagging/BigramHmm.py', \
                        'BigramHmm', dependencies={})
    py_model = client.create_model(py_model_name, task, 'examples/models/pos_tagging/PyBiLstm.py', \
                        'PyBiLstm', dependencies={ ModelDependency.PYTORCH: '0.4.1' })
    model_ids = [bihmm_model['id'], py_model['id']]

    print('Creating train job for app "{}" on Rafiki...'.format(app))
    budget = {BudgetType.MODEL_TRIAL_COUNT: 5, BudgetType.GPU_COUNT: gpus}
    train_dataset_uri = 'https://github.com/nginyc/rafiki-datasets/blob/master/pos_tagging/ptb_for_pos_tagging_train.zip?raw=true'
    test_dataset_uri = 'https://github.com/nginyc/rafiki-datasets/blob/master/pos_tagging/ptb_for_pos_tagging_test.zip?raw=true'
    train_job = client.create_train_job(app,
                                        task,
                                        train_dataset_uri,
                                        test_dataset_uri,
                                        budget,
                                        models=model_ids)
    pprint.pprint(train_job)

    print('Waiting for train job to complete...')
    print('This might take a few minutes')
    wait_until_train_job_has_stopped(client, app)
    print('Train job has been stopped')

    print(
        'Listing best trials of latest train job for app "{}"...'.format(app))
    pprint.pprint(client.get_best_trials_of_train_job(app))

    print('Creating inference job for app "{}" on Rafiki...'.format(app))
    pprint.pprint(client.create_inference_job(app))
    predictor_host = get_predictor_host(client, app)
    if not predictor_host:
        raise Exception('Inference job has errored or stopped')
    print('Inference job is running!')

    print('Making predictions for queries:')
    queries = [['Ms.', 'Haag', 'plays', 'Elianti', '18', '.'],
               [
                   'The', 'luxury', 'auto', 'maker', 'last', 'year', 'sold',
                   '1,214', 'cars', 'in', 'the', 'U.S.'
               ]]
    print(queries)
    predictions = make_predictions(client, predictor_host, queries)
    print('Predictions are:')
    print(predictions)

    print('Stopping inference job...')
    pprint.pprint(client.stop_inference_job(app))
예제 #2
0
def run_speech_recognition(client, train_dataset_path, val_dataset_path, gpus,
                           hours):
    '''
        Conducts training with the `TfDeepSpeech` model for the task ``SPEECH_RECOGNITION`.
    '''

    task = 'SPEECH_RECOGNITION'

    # Randomly generate app & model names to avoid naming conflicts
    app_id = gen_id()
    app = 'speech_recognition_app_{}'.format(app_id)
    tf_model_name = 'TfDeepSpeech_{}'.format(app_id)

    print('Creating & uploading datasets onto SINGA-Auto...')
    train_dataset = client.create_dataset('{}_train'.format(app), task,
                                          train_dataset_path)
    pprint(train_dataset)
    val_dataset = client.create_dataset('{}_val'.format(app), task,
                                        val_dataset_path)
    pprint(val_dataset)

    print('Adding models "{}" to SINGA-Auto...'.format(tf_model_name))
    tf_model = client.create_model(
        tf_model_name,
        task,
        'examples/models/speech_recognition/TfDeepSpeech.py',
        'TfDeepSpeech',
        docker_image=IMAGE_TFDEEPSPEECH,
        dependencies={
            ModelDependency.TENSORFLOW: '1.12.0',
            ModelDependency.DS_CTCDECODER: '0.6.0-alpha.4'
        })
    pprint(tf_model)

    print('Creating train job for app "{}" on SINGA-Auto...'.format(app))
    budget = {BudgetOption.TIME_HOURS: hours, BudgetOption.GPU_COUNT: gpus}
    train_job = client.create_train_job(app,
                                        task,
                                        train_dataset['id'],
                                        val_dataset['id'],
                                        budget,
                                        models=[tf_model['id']])
    pprint(train_job)

    print('Monitor the train job on SINGA-Auto Web Admin')
예제 #3
0
def run_pos_tagging(client, train_dataset_path, val_dataset_path, gpus, hours):
    '''
        Conducts a full train-inference flow on the Penn Treebank sample dataset with
        models `BigramHmm` and `PyBiLstm` for the task `POS_TAGGING`.
    '''

    task = 'POS_TAGGING'

    # Randomly generate app & model names to avoid naming conflicts
    app_id = gen_id()
    app = 'pos_tagging_app_{}'.format(app_id)
    bihmm_model_name = 'BigramHmm_{}'.format(app_id)
    py_model_name = 'PyBiLstm_{}'.format(app_id)

    print('Preprocessing datasets...')
    load_sample_ptb(train_dataset_path, val_dataset_path)

    print('Creating & uploading datasets onto SINGA-Auto...')
    train_dataset = client.create_dataset('{}_train'.format(app), task,
                                          train_dataset_path)
    pprint(train_dataset)
    val_dataset = client.create_dataset('{}_val'.format(app), task,
                                        val_dataset_path)
    pprint(val_dataset)

    print('Preprocessing datasets...')
    load_sample_ptb(train_dataset_path, val_dataset_path)

    print('Adding models "{}" and "{}" to SINGA-Auto...'.format(
        bihmm_model_name, py_model_name))
    bihmm_model = client.create_model(bihmm_model_name, task, 'examples/models/pos_tagging/BigramHmm.py', \
                        'BigramHmm', dependencies={})

    pprint(bihmm_model)
    py_model = client.create_model(py_model_name, task, 'examples/models/pos_tagging/PyBiLstm.py', \
                        'PyBiLstm', dependencies={ ModelDependency.TORCH: '0.4.1' })
    pprint(py_model)
    model_ids = [bihmm_model['id'], py_model['id']]

    print('Creating train job for app "{}" on SINGA-Auto...'.format(app))
    budget = {BudgetOption.TIME_HOURS: hours, BudgetOption.GPU_COUNT: gpus}
    train_job = client.create_train_job(app,
                                        task,
                                        train_dataset['id'],
                                        val_dataset['id'],
                                        budget,
                                        models=model_ids)
    pprint(train_job)

    print('Waiting for train job to complete...')
    print('This might take a few minutes')
    wait_until_train_job_has_stopped(client, app)
    print('Train job has been stopped')

    print('Listing best trials of latest train job for app "{}"...'.format(app))
    pprint(client.get_best_trials_of_train_job(app))

    print('Creating inference job for app "{}" on SINGA-Auto...'.format(app))
    pprint(client.create_inference_job(app))
    predictor_host = get_predictor_host(client, app)
    if not predictor_host:
        raise Exception('Inference job has errored or stopped')
    print('Inference job is running!')

    print('Making predictions for queries:')
    queries = [['Ms.', 'Haag', 'plays', 'Elianti', '18', '.'],
               [
                   'The', 'luxury', 'auto', 'maker', 'last', 'year', 'sold',
                   '1,214', 'cars', 'in', 'the', 'U.S.'
               ]]
    print(queries)
    predictions = make_predictions(client, predictor_host, queries)
    print('Predictions are:')
    print(predictions)

    print('Stopping inference job...')
    pprint(client.stop_inference_job(app))
예제 #4
0
def run_tabular_regression(client,
                           csv_file_url,
                           gpus,
                           hours,
                           features=None,
                           target=None,
                           queries=None):
    '''
    Runs a sample full train-inference flow for the task ``TABULAR_REGRESSION``.
    '''

    task = 'TABULAR_REGRESSION'

    # Randomly generate app & model names to avoid naming conflicts
    app_id = gen_id()
    app = 'tabular_regression_app_{}'.format(app_id)
    xgb_model_name = 'XgbReg_{}'.format(app_id)
    train_dataset_path = 'data/{}_train.csv'.format(app)
    val_dataset_path = 'data/{}_val.csv'.format(app)

    print('Preprocessing dataset...')
    load(csv_file_url, train_dataset_path, val_dataset_path)

    print('Creating & uploading datasets onto SINGA-Auto...')
    train_dataset = client.create_dataset('{}_train'.format(app), task,
                                          train_dataset_path)
    pprint(train_dataset)
    val_dataset = client.create_dataset('{}_val'.format(app), task,
                                        val_dataset_path)
    pprint(val_dataset)

    print('Adding models "{}" to SINGA-Auto...'.format(xgb_model_name))
    xgb_model = client.create_model(xgb_model_name, task, 'examples/models/tabular_regression/XgbReg.py', \
                        'XgbReg', dependencies={ ModelDependency.XGBOOST: '0.90' })
    pprint(xgb_model)

    print('Creating train job for app "{}" on SINGA-Auto...'.format(app))
    budget = {BudgetOption.TIME_HOURS: hours, BudgetOption.GPU_COUNT: gpus}
    train_job = client.create_train_job(app,
                                        task,
                                        train_dataset['id'],
                                        val_dataset['id'],
                                        budget,
                                        models=[xgb_model['id']],
                                        train_args={
                                            'features': features,
                                            'target': target
                                        })
    pprint(train_job)

    print('Waiting for train job to complete...')
    print('This might take a few minutes')
    wait_until_train_job_has_stopped(client, app)
    print('Train job has been stopped')

    print(
        'Listing best trials of latest train job for app "{}"...'.format(app))
    pprint(client.get_best_trials_of_train_job(app))

    print('Creating inference job for app "{}" on SINGA-Auto...'.format(app))
    pprint(client.create_inference_job(app))
    predictor_host = get_predictor_host(client, app)
    if not predictor_host:
        raise Exception('Inference job has errored or stopped')
    print('Inference job is running!')

    if queries is not None:
        print('Making predictions for queries:')
        print(queries)
        predictions = make_predictions(client, predictor_host, queries)
        print('Predictions are:')
        print(predictions)

    print('Stopping inference job...')
    pprint(client.stop_inference_job(app))