Ejemplo n.º 1
0
def train_densenet(client, train_dataset_path, val_dataset_path, gpus, hours):
    '''
        Conducts training of model `PyDenseNetBc` on the CIFAR-10 dataset for IMAGE_CLASSIFICATION.
        Demonstrates hyperparameter tuning with distributed parameter sharing on SINGA-Auto.
    '''
    task = 'IMAGE_CLASSIFICATION'

    app_id = gen_id()
    app = 'cifar10_densenet_{}'.format(app_id)
    model_name = 'PyDenseNetBc_{}'.format(app_id)

    print('Preprocessing datasets...')
    load_cifar10(train_dataset_path, val_dataset_path)

    print('Creating & uploading datasets onto SINGA-Auto...')
    train_dataset = client.create_dataset('{}_train'.format(app), task,
                                          train_dataset_path)
    pprint(train_dataset)
    val_dataset = client.create_dataset('{}_val'.format(app), task,
                                        val_dataset_path)
    pprint(val_dataset)

    print('Creating model...')
    model = client.create_model(
        name=model_name,
        task='IMAGE_CLASSIFICATION',
        model_file_path='examples/models/image_classification/PyDenseNetBc.py',
        model_class='PyDenseNetBc',
        dependencies={
            ModelDependency.TORCH: '1.0.1',
            ModelDependency.TORCHVISION: '0.2.2'
        })
    pprint(model)

    print('Creating train job...')
    budget = {BudgetOption.TIME_HOURS: hours, BudgetOption.GPU_COUNT: gpus}
    train_job = client.create_train_job(app,
                                        task,
                                        train_dataset['id'],
                                        val_dataset['id'],
                                        budget,
                                        models=[model['id']])
    pprint(train_job)

    print('Monitor the train job on SINGA-Auto Web Admin')
Ejemplo n.º 2
0
def run_enas(client, train_dataset_path, val_dataset_path, gpus, hours):
    '''
        Conducts training of model `TfEnas` on the CIFAR-10 dataset for IMAGE_CLASSIFICATION.
        Demonstrates architecture tuning with ENAS on SINGA-Auto.
    '''
    task = 'IMAGE_CLASSIFICATION'

    app_id = gen_id()
    app = 'cifar10_enas_{}'.format(app_id)
    model_name = 'TfEnas_{}'.format(app_id)

    print('Preprocessing datasets...')
    load_cifar10(train_dataset_path, val_dataset_path)

    print('Creating & uploading datasets onto SINGA-Auto...')
    train_dataset = client.create_dataset('{}_train'.format(app), task,
                                          train_dataset_path)
    pprint(train_dataset)
    val_dataset = client.create_dataset('{}_val'.format(app), task,
                                        val_dataset_path)
    pprint(val_dataset)

    print('Creating model...')
    model = client.create_model(
        name=model_name,
        task='IMAGE_CLASSIFICATION',
        model_file_path='examples/models/image_classification/TfEnas.py',
        model_class='TfEnas',
        dependencies={ModelDependency.TENSORFLOW: '1.12.0'})
    pprint(model)

    print('Creating train job...')
    budget = {BudgetOption.TIME_HOURS: hours, BudgetOption.GPU_COUNT: gpus}
    train_job = client.create_train_job(app,
                                        task,
                                        train_dataset['id'],
                                        val_dataset['id'],
                                        budget,
                                        models=[model['id']])
    pprint(train_job)

    print('Monitor the train job on SINGA-Auto Web Admin')
Ejemplo n.º 3
0
def image_dataset_download_load_and_split(dataset_name, data_dir='data', min_image_size=None, max_image_size=None, mode=None, image_scale_size=None, dataset_types=['train','test']):
    ''' 
    :return: x_train, y_train, x_test, y_test. 

    x_train,  x_test are lists of images in torch tensor format.
    y_train, y_test are lists of labels, corresponding to the x_ sets.
    
    :param dataset_name: list of str.
    :param if_download: True/False. True means to download datasets to the nominated data_dir. False stands for loading the existing datasets.
    :param data_dir: directory that datasets 'download to' or 'load from'.
    '''
    # if the dataset is not prepared in the directory yet
    assert min_image_size<=max_image_size
    if dataset_name == 'cifar10':
        if os.path.exists(data_dir+'cifar10_train.zip') is False or os.path.exists(data_dir+'cifar10_test.zip') is False :
            print ('Dataset Downloading ... ')
            from examples.datasets.image_files.load_cifar10 import load_cifar10
            # This will generate train, test, val datasets to 'data/' directory. When validation_split set to 0, means the val dataset is empty.
            load_cifar10(limit=None, validation_split=0,
                out_train_dataset_path=data_dir+'cifar10_train.zip',
                out_val_dataset_path=data_dir+'cifar10_val.zip',
                out_test_dataset_path=data_dir+'cifar10_test.zip',
                out_meta_csv_path=data_dir+'cifar10_meta.csv')
    elif dataset_name == 'fashion_mnist':
        if os.path.exists(data_dir+'fashion_mnist_train.zip') is False or os.path.exists(data_dir+'fashion_mnist_test.zip') is False :
            print ('Dataset Downloading ... ')
            from examples.datasets.image_files.load_fashion_mnist import load_fashion_mnist
            # This will generate train, test, val datasets to 'data/' directory. When validation_split set to 0, means the val dataset is empty.
            load_fashion_mnist(limit=None, validation_split=0,
                out_train_dataset_path=data_dir+'fashion_mnist_train.zip',
                out_val_dataset_path=data_dir+'fashion_mnist_val.zip',
                out_meta_csv_path=data_dir+'fashion_mnist_meta.csv',
                out_test_dataset_path=data_dir+'fashion_mnist_test.zip')

    elif dataset_name == 'xray' or dataset_name == 'chest-xray-pneumonia':
        if os.path.exists(data_dir+'chest-xray-pneumonia.zip') is False:
            print ('Dataset Downloading ... ')
            import kaggle
            kaggle.api.authenticate()
            kaggle.api.dataset_download_files('paultimothymooney/chest-xray-pneumonia', path=data_dir, unzip=False)

    # reading downloaded dataset zipfiles
    if dataset_name == 'xray' or dataset_name == 'chest-xray-pneumonia':
        datasets_loaded = processing_and_reading_downloaded_xray('chest-xray-pneumonia', 
                                                   data_dir, 
                                                   min_image_size=min_image_size, 
                                                   max_image_size=max_image_size, 
                                                   mode=mode, 
                                                   image_scale_size=image_scale_size)
    else:
        datasets_loaded = dict()
        for dataset_type in dataset_types:
            (x, y) = processing_and_reading_existing_datasets(dataset_name, 
                                                              data_dir=data_dir,  
                                                              min_image_size=min_image_size, 
                                                              max_image_size=max_image_size, 
                                                              mode=mode, 
                                                              image_scale_size=image_scale_size,
                                                              dataset_type=dataset_type)
            datasets_loaded.update({f'x_{dataset_type}': x, f'y_{dataset_type}': y})

    return datasets_loaded