コード例 #1
0
def test_cifar_resnet_distributed_block_momentum(device_id):
    params = [ "-e", "2",
               "-datadir", prepare_CIFAR10_data(),
               "-b", "3200",
               "-es", "512",
               "-device", "0" ]
    mpiexec_test(device_id, script_under_test, params, 0.89, False, 5)
コード例 #2
0
def test_cifar_resnet_distributed_error(device_id, is_1bit_sgd):
    if cntk_device(device_id).type() != DeviceKind_GPU:
        pytest.skip('test only runs on GPU')
    set_default_device(cntk_device(device_id))

    if not is_1bit_sgd:
        pytest.skip('test only runs in 1-bit SGD')

    base_path = prepare_CIFAR10_data()
    # change dir to locate data.zip correctly
    os.chdir(base_path)

    from _cntk_py import set_computation_network_trace_level, set_fixed_random_seed, force_deterministic_algorithms
    set_computation_network_trace_level(1)
    set_fixed_random_seed(1)  # BUGBUG: has no effect at present  # TODO: remove debugging facilities once this all works
    #force_deterministic_algorithms()
    # TODO: do the above; they lead to slightly different results, so not doing it for now

    train_data=os.path.join(base_path, 'train_map.txt')
    test_data=os.path.join(base_path, 'test_map.txt')
    mean_data=os.path.join(base_path, 'CIFAR-10_mean.xml')

    test_error = resnet_cifar10(train_data, test_data, mean_data, 'resnet20', epoch_size=512, max_epochs=2)

# We are removing tolerance in error because running small epoch size has huge variance in accuracy. Will add
# tolerance back once convolution operator is determinsitic. 

#    expected_test_error = 0.282

#    assert np.allclose(test_error, expected_test_error,
#                       atol=TOLERANCE_ABSOLUTE)
    distributed.Communicator.finalize()
コード例 #3
0
def train_cifar_resnet_for_eval(test_device, output_dir):
    output_dir = os.path.abspath(output_dir)
    if not os.path.isdir(output_dir):
        os.mkdir(output_dir)
    base_path = prepare_test_data.prepare_CIFAR10_data()

    # change dir to locate data.zip correctly
    os.chdir(base_path)

    if test_device == 'cpu':
        print('train cifar_resnet only on GPU device. Use pre-trained models.')
    else:
        print('training cifar_resnet on GPU device...')
        reader_train = TrainResNet_CIFAR10.create_reader(
            os.path.join(base_path, 'train_map.txt'),
            os.path.join(base_path, 'CIFAR-10_mean.xml'), True)
        reader_test = TrainResNet_CIFAR10.create_reader(
            os.path.join(base_path, 'test_map.txt'),
            os.path.join(base_path, 'CIFAR-10_mean.xml'), False)
        TrainResNet_CIFAR10.train_and_evaluate(reader_train,
                                               reader_test,
                                               'resnet20',
                                               epoch_size=512,
                                               max_epochs=1,
                                               profiler_dir=None,
                                               model_dir=output_dir)

    return base_path
def test_bn_inception_cifar(device_id):
    if cntk_device(device_id).type() != DeviceKind_GPU:
        pytest.skip('test only runs on GPU')
    try_set_default_device(cntk_device(device_id))

    current_path = os.getcwd()
    base_path = prepare_CIFAR10_data()
    # change dir to locate data.zip correctly
    os.chdir(base_path)

    from _cntk_py import set_fixed_random_seed, force_deterministic_algorithms
    set_fixed_random_seed(1)
    force_deterministic_algorithms()

    mean_data = os.path.join(base_path, 'CIFAR-10_mean.xml')
    train_data = os.path.join(base_path, 'train_map.txt')
    test_data = os.path.join(base_path, 'test_map.txt')

    try:
        error = bn_inception_train_and_eval(train_data,
                                            test_data,
                                            mean_data,
                                            minibatch_size=16,
                                            epoch_size=500,
                                            max_epochs=8,
                                            restore=False,
                                            testing_parameters=(500, 16))
    finally:
        os.chdir(current_path)

    expected_error = 0.88
    assert np.allclose(error, expected_error, atol=TOLERANCE_ABSOLUTE)
コード例 #5
0
def test_cifar_resnet_distributed_1bitsgd(device_id):
    params = [ "-e", "2",
               "-datadir", prepare_CIFAR10_data(),
               "-q", "1",
               "-es", "512",
               "-device", "0" ]
    mpiexec_test(device_id, script_under_test, params, 0.86, False, 3)
コード例 #6
0
def test_cifar_convnet_distributed(device_id):
    # Create a path to TensorBoard log directory and make sure it does not exist.
    abs_path = os.path.dirname(os.path.abspath(__file__))
    tb_logdir = os.path.join(abs_path,
                             'ConvNet_CIFAR10_DataAug_Distributed_test_log')
    if os.path.exists(tb_logdir):
        shutil.rmtree(tb_logdir)

    params = [
        "-n", "2", "-m", "64", "-e", "3200", "-datadir",
        prepare_CIFAR10_data(), "-tensorboard_logdir", tb_logdir, "-q", "32",
        "-r", "-device",
        str(device_id)
    ]
    mpiexec_test(device_id,
                 script_under_test,
                 mpiexec_params,
                 params,
                 0.75,
                 False,
                 per_minibatch_tolerance=1e-2
                 )  # False since different workers may have different #cores

    # Ensure that the TensorBoard log directory was created and contains exactly one file with the expected name.
    tb_files = 0
    for tb_file in os.listdir(tb_logdir):
        assert tb_file.startswith("events.out.tfevents")
        tb_files += 1
    assert tb_files == 1
コード例 #7
0
def test_cifar_convnet_error(device_id):
    if cntk_device(device_id).type() != DeviceKind_GPU:
        pytest.skip('test only runs on GPU')
    set_default_device(cntk_device(device_id))

    base_path = prepare_CIFAR10_data()
    # change dir to locate data.zip correctly
    os.chdir(base_path)

    from _cntk_py import set_computation_network_trace_level, set_fixed_random_seed, force_deterministic_algorithms
    set_computation_network_trace_level(1)
    set_fixed_random_seed(
        1
    )  # BUGBUG: has no effect at present  # TODO: remove debugging facilities once this all works
    #force_deterministic_algorithms()
    # TODO: do the above; they lead to slightly different results, so not doing it for now

    reader_train = create_reader(os.path.join(base_path, 'train_map.txt'),
                                 os.path.join(base_path, 'CIFAR-10_mean.xml'),
                                 True)
    reader_test = create_reader(os.path.join(base_path, 'test_map.txt'),
                                os.path.join(base_path, 'CIFAR-10_mean.xml'),
                                False)

    test_error = convnetlrn_cifar10_dataaug(reader_train,
                                            reader_test,
                                            epoch_size=256,
                                            max_epochs=1)
コード例 #8
0
def test_bn_inception_cifar(device_id):
    if cntk_device(device_id).type() != DeviceKind_GPU:
        pytest.skip('test only runs on GPU')
    try_set_default_device(cntk_device(device_id))

    current_path = os.getcwd()
    base_path = prepare_CIFAR10_data()
    # change dir to locate data.zip correctly
    os.chdir(base_path)

    from _cntk_py import set_fixed_random_seed, force_deterministic_algorithms
    set_fixed_random_seed(1)
    force_deterministic_algorithms()

    mean_data = os.path.join(base_path, 'CIFAR-10_mean.xml')
    train_data = os.path.join(base_path, 'train_map.txt')
    test_data = os.path.join(base_path, 'test_map.txt')

    try:
        error = bn_inception_train_and_eval(train_data, test_data, mean_data, minibatch_size=16, epoch_size=500,
                                    max_epochs=8, restore=False, testing_parameters=(500,16))
    finally:
        os.chdir(current_path)

    expected_error = 0.88
    assert np.allclose(error, expected_error, atol=TOLERANCE_ABSOLUTE)
コード例 #9
0
def test_cifar_convnet_error(device_id):
    if cntk_device(device_id).type() != DeviceKind_GPU:
        pytest.skip('test only runs on GPU')
    try_set_default_device(cntk_device(device_id))

    base_path = prepare_CIFAR10_data()
    # change dir to locate data.zip correctly
    os.chdir(base_path)

    from _cntk_py import set_fixed_random_seed, force_deterministic_algorithms
    set_fixed_random_seed(1)
    force_deterministic_algorithms()

    reader_train1 = create_reader(os.path.join(base_path, 'train_map.txt'),
                                  os.path.join(base_path, 'CIFAR-10_mean.xml'),
                                  False)
    reader_test1 = create_reader(os.path.join(base_path, 'test_map.txt'),
                                 os.path.join(base_path, 'CIFAR-10_mean.xml'),
                                 False)
    model1 = create_convnet_cifar10_model(num_classes=10)
    train_loss1 = train_model(reader_train1,
                              reader_test1,
                              model1,
                              epoch_size=128,
                              max_epochs=1)
def test_cifar_convnet_error(device_id):
    if cntk_device(device_id).type() != DeviceKind_GPU:
        pytest.skip('test only runs on GPU')
    try_set_default_device(cntk_device(device_id))

    base_path = prepare_CIFAR10_data()
    # change dir to locate data.zip correctly
    os.chdir(base_path)

    from _cntk_py import set_fixed_random_seed, force_deterministic_algorithms
    set_fixed_random_seed(1)
    force_deterministic_algorithms()

    reader_train = create_reader(os.path.join(base_path, 'train_map.txt'),
                                 os.path.join(base_path, 'CIFAR-10_mean.xml'),
                                 False)
    model = create_convnet_cifar10_model(num_classes=10)
    model.update_signature((num_channels, image_height, image_width))
    criterion = create_criterion_function(model, normalize=lambda x: x / 256)
    train_loss, metric = train_model(reader_train,
                                     model,
                                     criterion,
                                     epoch_size=128,
                                     max_epochs=5)

    expected_loss_metric = (2.2963, 0.9062)
    assert np.allclose((train_loss, metric),
                       expected_loss_metric,
                       atol=TOLERANCE_ABSOLUTE)
コード例 #11
0
def test_cifar_convnet_distributed_block_momentum(device_id):
    params = [
        "-n", "2", "-m", "64", "-e", "3200", "-datadir",
        prepare_CIFAR10_data(), "-b", "1600", "-r", "-device",
        str(device_id)
    ]
    mpiexec_test(device_id, script_under_test, mpiexec_params, params, 0.78,
                 False, 10)
コード例 #12
0
def test_cifar_resnet_distributed_block_momentum(device_id):
    params = [
        "-e", "2", "-datadir",
        prepare_CIFAR10_data(), "-b", "3200", "-es", "512", "-device",
        str(device_id)
    ]
    mpiexec_test(device_id, script_under_test, mpiexec_params, params, 0.89,
                 False, 5)
コード例 #13
0
def test_cifar_resnet_distributed_1bitsgd(device_id):
    params = [
        "-e", "2", "-datadir",
        prepare_CIFAR10_data(), "-q", "1", "-es", "512", "-device",
        str(device_id)
    ]
    mpiexec_test(device_id, script_under_test, mpiexec_params, params, 0.86,
                 False, 3)
コード例 #14
0
def test_bn_inception_cifar_distributed(device_id):
    params = [ "-n", "8",
               "-datadir", prepare_CIFAR10_data(),
               "-q", "32",
               "-e", "500",
               "-m", "16",
               "-r",
               "-device", str(device_id) ]
    mpiexec_test(device_id, script_under_test, mpiexec_params, params, 0.88, True)
コード例 #15
0
def test_cifar_convnet_distributed_1bitsgd(device_id):
    params = [ "-n", "2",
               "-m", "64",
               "-e", "3200",
               "-datadir", prepare_CIFAR10_data(),
               "-q", "1",
               "-r",
               "-device", str(device_id) ]
    mpiexec_test(device_id, script_under_test, mpiexec_params, params, 0.75, False, per_minibatch_tolerance=1e-2)
コード例 #16
0
def test_cifar_convnet_distributed_block_momentum(device_id):
    params = [ "-n", "2",
               "-m", "64", 
               "-e", "3200",
               "-datadir", prepare_CIFAR10_data(),
               "-b", "1600",
               "-r",
               "-device", "0" ]
    mpiexec_test(device_id, script_under_test, params, 0.78, False, 10)
def test_bn_inception_cifar_distributed(device_id):
    params = [
        "-n", "8", "-datadir",
        prepare_CIFAR10_data(), "-q", "32", "-e", "500", "-m", "16", "-r",
        "-device",
        str(device_id)
    ]
    mpiexec_test(device_id, script_under_test, mpiexec_params, params, 0.88,
                 True)
コード例 #18
0
def test_cifar_convnet_distributed_1bitsgd(device_id):
    params = [ "-n", "2",
               "-m", "64", 
               "-e", "3200", 
               "-datadir", prepare_CIFAR10_data(),
               "-q", "1",
               "-r",
               "-device", "0" ]
    mpiexec_test(device_id, script_under_test, params, 0.75, True)
コード例 #19
0
def test_cifar_resnet_error(device_id):
    if cntk_device(device_id).type() != DeviceKind_GPU:
        pytest.skip('test only runs on GPU')
    try_set_default_device(cntk_device(device_id))

    base_path = prepare_CIFAR10_data()
    # change dir to locate data.zip correctly
    os.chdir(base_path)

    from _cntk_py import set_computation_network_trace_level, set_fixed_random_seed, force_deterministic_algorithms
    set_computation_network_trace_level(1)
    set_fixed_random_seed(
        1
    )  # BUGBUG: has no effect at present  # TODO: remove debugging facilities once this all works
    #force_deterministic_algorithms()
    # TODO: do the above; they lead to slightly different results, so not doing it for now

    reader_train = create_image_mb_source(os.path.join(base_path,
                                                       'train_map.txt'),
                                          os.path.join(base_path,
                                                       'CIFAR-10_mean.xml'),
                                          True,
                                          total_number_of_samples=1 * 50000)
    reader_test = create_image_mb_source(
        os.path.join(base_path, 'test_map.txt'),
        os.path.join(base_path, 'CIFAR-10_mean.xml'),
        False,
        total_number_of_samples=cntk.io.FULL_DATA_SWEEP)

    # Create a path to TensorBoard log directory and make sure it does not exist.
    abs_path = os.path.dirname(os.path.abspath(__file__))
    tb_logdir = os.path.join(abs_path, 'TrainResNet_CIFAR10_test_log')
    if os.path.exists(tb_logdir):
        shutil.rmtree(tb_logdir)

    test_error = train_and_evaluate(reader_train,
                                    reader_test,
                                    'resnet20',
                                    epoch_size=512,
                                    max_epochs=1,
                                    tensorboard_logdir=tb_logdir)

    # We are removing tolerance in error because running small epoch size has huge variance in accuracy. Will add
    # tolerance back once convolution operator is determinsitic.

    #    expected_test_error = 0.282

    #    assert np.allclose(test_error, expected_test_error,
    #                       atol=TOLERANCE_ABSOLUTE)

    files = 0
    for file in os.listdir(tb_logdir):
        assert file.startswith("events.out.tfevents")
        files += 1
    assert files == 1
コード例 #20
0
def test_binary_convnet_error(device_id):
    if cntk_device(device_id).type() != DeviceKind_GPU:
        pytest.skip('test only runs on GPU')
    try_set_default_device(cntk_device(device_id))

    base_path = prepare_CIFAR10_data()
    # change dir to locate data.zip correctly
    os.chdir(base_path)

    from _cntk_py import set_fixed_random_seed, force_deterministic_algorithms
    set_fixed_random_seed(1)
    force_deterministic_algorithms()

    reader_train = create_reader(os.path.join(base_path, 'train_map.txt'),
                                 os.path.join(base_path, 'CIFAR-10_mean.xml'),
                                 False)
    model = create_binary_convolution_model()
    z, criterion = get_z_and_criterion(model)
    train_loss, metric = train_model(reader_train,
                                     z,
                                     criterion,
                                     epoch_size=8192,
                                     max_epochs=5)

    expected_loss_metric = (2.677057718858123, 0.6043701171875)
    assert np.allclose((train_loss, metric),
                       expected_loss_metric,
                       atol=TOLERANCE_ABSOLUTE)

    # save and load (as an illustration)
    model_path = "model.cmf"
    model.save(model_path)
    eval_device = C.cpu()
    model = Function.load(model_path, device=eval_device)

    # test
    model_with_native_binary_convolutions = clone_with_native_binary_convolutions(
        model)
    _, criterion = get_z_and_criterion(model_with_native_binary_convolutions)

    reader_test = create_reader(os.path.join(base_path, 'test_map.txt'),
                                os.path.join(base_path, 'CIFAR-10_mean.xml'),
                                False)
    test_loss, metric = evaluate(reader_test,
                                 criterion,
                                 device=eval_device,
                                 minibatch_size=1,
                                 max_samples=200)

    expected_loss_metric = (0.0, 0.695)
    assert np.allclose((test_loss, metric),
                       expected_loss_metric,
                       atol=TOLERANCE_ABSOLUTE)
コード例 #21
0
def unzip_data(output_dir):
    output_dir = os.path.abspath(output_dir)
    if not os.path.isdir(output_dir):
        os.mkdir(output_dir)
    base_path = prepare_test_data.prepare_CIFAR10_data()

    # change dir to locate data.zip correctly
    os.chdir(base_path)

    # unzip test images for eval
    with zipfile.ZipFile(os.path.join(base_path, 'cifar-10-batches-py', 'data.zip')) as myzip:
        for fn in range(6):
            myzip.extract('data/train/%05d.png'%(fn), output_dir)
コード例 #22
0
def unzip_data(output_dir):
    output_dir = os.path.abspath(output_dir)
    if not os.path.isdir(output_dir):
        os.mkdir(output_dir)
    base_path = prepare_test_data.prepare_CIFAR10_data()

    # change dir to locate data.zip correctly
    os.chdir(base_path)

    # unzip test images for eval
    with zipfile.ZipFile(
            os.path.join(base_path, 'cifar-10-batches-py',
                         'data.zip')) as myzip:
        for fn in range(6):
            myzip.extract('data/train/%05d.png' % (fn), output_dir)
コード例 #23
0
def test_cifar_convnet_error(device_id):
    if cntk_device(device_id).type() != DeviceKind_GPU:
        pytest.skip('test only runs on GPU')
    set_default_device(cntk_device(device_id))

    base_path = prepare_CIFAR10_data()
    # change dir to locate data.zip correctly
    os.chdir(base_path)

    from _cntk_py import set_fixed_random_seed, force_deterministic_algorithms
    set_fixed_random_seed(1)  # BUGBUG: has no effect at present  # TODO: remove debugging facilities once this all works
    #force_deterministic_algorithms()
    # TODO: do the above; they lead to slightly different results, so not doing it for now

    reader_train = create_reader(os.path.join(base_path, 'train_map.txt'), os.path.join(base_path, 'CIFAR-10_mean.xml'), True)
    reader_test  = create_reader(os.path.join(base_path, 'test_map.txt'), os.path.join(base_path, 'CIFAR-10_mean.xml'), False)

    test_error = convnet_cifar10_dataaug(reader_train, reader_test, epoch_size=256, max_epochs=1)
コード例 #24
0
def train_cifar_resnet_for_eval(test_device, output_dir):
    output_dir = os.path.abspath(output_dir)
    if not os.path.isdir(output_dir):
        os.mkdir(output_dir)
    base_path = prepare_test_data.prepare_CIFAR10_data()

    # change dir to locate data.zip correctly
    os.chdir(base_path)

    if test_device == 'cpu':
        print('train cifar_resnet only on GPU device. Use pre-trained models.')
    else:
        print('training cifar_resnet on GPU device...')
        reader_train = TrainResNet_CIFAR10.create_reader(os.path.join(base_path, 'train_map.txt'), os.path.join(base_path, 'CIFAR-10_mean.xml'), True)
        reader_test  = TrainResNet_CIFAR10.create_reader(os.path.join(base_path, 'test_map.txt'), os.path.join(base_path, 'CIFAR-10_mean.xml'), False)
        TrainResNet_CIFAR10.train_and_evaluate(reader_train, reader_test, 'resnet20', epoch_size=512, max_epochs=1, profiler_dir=None, model_dir=output_dir)

    return base_path
def test_binary_convnet_error(device_id):

    if not native_convolve_function_registered:
      pytest.skip("Could not find {0} library. "
        "Please check if HALIDE_PATH is configured properly "
        "and try building {1} again"
        .format('Cntk.BinaryConvolution-' + C.__version__.rstrip('+'),
        'Extnsibiliy\\BinaryConvolution'))
     
    if cntk_device(device_id).type() != DeviceKind_GPU:
        pytest.skip('test only runs on GPU')
    try_set_default_device(cntk_device(device_id))

    base_path = prepare_CIFAR10_data()
    # change dir to locate data.zip correctly
    os.chdir(base_path)

    from _cntk_py import set_fixed_random_seed, force_deterministic_algorithms
    set_fixed_random_seed(1)
    force_deterministic_algorithms()

    reader_train = create_reader(os.path.join(base_path, 'train_map.txt'), os.path.join(base_path, 'CIFAR-10_mean.xml'), False)
    model = create_binary_convolution_model()
    z, criterion = get_z_and_criterion(model)
    train_loss, metric = train_model(reader_train, z, criterion, epoch_size=8192, max_epochs=5)

    expected_loss_metric = (2.677057718858123, 0.6043701171875)
    assert np.allclose((train_loss, metric), expected_loss_metric, atol=TOLERANCE_ABSOLUTE)

    # save and load (as an illustration)
    model_path = "model.cmf"
    model.save(model_path)
    eval_device = C.cpu()
    model = Function.load(model_path, device=eval_device)

    # test
    model_with_native_binary_convolutions = clone_with_native_binary_convolutions(model)
    _, criterion = get_z_and_criterion(model_with_native_binary_convolutions)

    reader_test = create_reader(os.path.join(base_path, 'test_map.txt'), os.path.join(base_path, 'CIFAR-10_mean.xml'), False)
    test_loss, metric = evaluate(reader_test, criterion, device=eval_device, minibatch_size=1, max_samples=200)

    expected_loss_metric = (0.0, 0.695)
    assert np.allclose((test_loss, metric), expected_loss_metric, atol=TOLERANCE_ABSOLUTE)
コード例 #26
0
def test_cifar_convnet_error(device_id):
    if cntk_device(device_id).type() != DeviceKind_GPU:
        pytest.skip('test only runs on GPU')
    try_set_default_device(cntk_device(device_id))

    base_path = prepare_CIFAR10_data()
    # change dir to locate data.zip correctly
    os.chdir(base_path)

    from _cntk_py import set_fixed_random_seed, force_deterministic_algorithms
    set_fixed_random_seed(1)
    force_deterministic_algorithms()

    reader_train = create_reader(os.path.join(base_path, 'train_map.txt'), os.path.join(base_path, 'CIFAR-10_mean.xml'), False)
    model = create_convnet_cifar10_model(num_classes=10)
    model.update_signature((num_channels, image_height, image_width))
    criterion = create_criterion_function(model, normalize=lambda x: x / 256)
    train_loss, metric = train_model(reader_train, model, criterion, epoch_size=128, max_epochs=5)

    expected_loss_metric = (2.2963, 0.9062)
    assert np.allclose((train_loss, metric), expected_loss_metric, atol=TOLERANCE_ABSOLUTE)
コード例 #27
0
def test_cifar_resnet_error(device_id):
    if cntk_device(device_id).type() != DeviceKind_GPU:
        pytest.skip('test only runs on GPU')
    set_default_device(cntk_device(device_id))
    
    base_path = prepare_CIFAR10_data()
    # change dir to locate data.zip correctly
    os.chdir(base_path)

    from _cntk_py import set_computation_network_trace_level, set_fixed_random_seed, force_deterministic_algorithms
    set_computation_network_trace_level(1)
    set_fixed_random_seed(1)  # BUGBUG: has no effect at present  # TODO: remove debugging facilities once this all works
    #force_deterministic_algorithms()
    # TODO: do the above; they lead to slightly different results, so not doing it for now

    reader_train = create_reader(os.path.join(base_path, 'train_map.txt'), os.path.join(base_path, 'CIFAR-10_mean.xml'), True)
    reader_test  = create_reader(os.path.join(base_path, 'test_map.txt'), os.path.join(base_path, 'CIFAR-10_mean.xml'), False)

    # Create a path to TensorBoard log directory and make sure it does not exist.
    abs_path = os.path.dirname(os.path.abspath(__file__))
    tb_logdir = os.path.join(abs_path, 'TrainResNet_CIFAR10_test_log')
    if os.path.exists(tb_logdir):
        shutil.rmtree(tb_logdir)

    test_error = train_and_evaluate(reader_train, reader_test, 'resnet20', epoch_size=512, max_epochs=1,
                                    tensorboard_logdir=tb_logdir)

# We are removing tolerance in error because running small epoch size has huge variance in accuracy. Will add
# tolerance back once convolution operator is determinsitic. 
    
#    expected_test_error = 0.282

#    assert np.allclose(test_error, expected_test_error,
#                       atol=TOLERANCE_ABSOLUTE)

    files = 0
    for file in os.listdir(tb_logdir):
        assert file.startswith("events.out.tfevents")
        files += 1
    assert files == 1
コード例 #28
0
def test_cifar_convnet_distributed(device_id):
    # Create a path to TensorBoard log directory and make sure it does not exist.
    abs_path = os.path.dirname(os.path.abspath(__file__))
    tb_logdir = os.path.join(abs_path, 'ConvNet_CIFAR10_DataAug_Distributed_test_log')
    if os.path.exists(tb_logdir):
        shutil.rmtree(tb_logdir)

    params = [ "-n", "2",
               "-m", "64",
               "-e", "3200",
               "-datadir", prepare_CIFAR10_data(),
               "-tensorboard_logdir", tb_logdir,
               "-q", "32",
               "-r",
               "-device", str(device_id) ]
    mpiexec_test(device_id, script_under_test, mpiexec_params, params, 0.75, False, per_minibatch_tolerance=1e-2) # False since different workers may have different #cores

    # Ensure that the TensorBoard log directory was created and contains exactly one file with the expected name.
    tb_files = 0
    for tb_file in os.listdir(tb_logdir):
        assert tb_file.startswith("events.out.tfevents")
        tb_files += 1
    assert tb_files == 1
コード例 #29
0
def run_cifar_convnet_distributed():
    base_path = prepare_CIFAR10_data()
    # change dir to locate data.zip correctly
    os.chdir(base_path)

    from _cntk_py import set_computation_network_trace_level, set_fixed_random_seed, force_deterministic_algorithms
    set_computation_network_trace_level(1)
    set_fixed_random_seed(
        1
    )  # BUGBUG: has no effect at present  # TODO: remove debugging facilities once this all works
    #force_deterministic_algorithms()
    # TODO: do the above; they lead to slightly different results, so not doing it for now

    train_data = os.path.join(base_path, 'train_map.txt')
    mean_data = os.path.join(base_path, 'CIFAR-10_mean.xml')
    test_data = os.path.join(base_path, 'test_map.txt')

    num_quantization_bits = 32
    return convnet_cifar10_dataaug(train_data,
                                   test_data,
                                   mean_data,
                                   num_quantization_bits,
                                   epoch_size=512,
                                   max_epochs=2)
コード例 #30
0
def train_cifar_resnet_for_eval(test_device, output_dir):

    output_dir = os.path.abspath(output_dir)
    if not os.path.isdir(output_dir):
        os.mkdir(output_dir)
    base_path = prepare_test_data.prepare_CIFAR10_data()

    # change dir to locate data.zip correctly
    os.chdir(base_path)

    # unzip test images for eval
    with zipfile.ZipFile(
            os.path.join(base_path, 'cifar-10-batches-py',
                         'data.zip')) as myzip:
        for fn in range(6):
            myzip.extract('data/train/%05d.png' % (fn), output_dir)

    if test_device == 'cpu':
        print('train cifar_resnet only on GPU device. Use pre-trained models.')
    else:
        print('training cifar_resnet on GPU device...')
        reader_train = TrainResNet_CIFAR10.create_reader(
            os.path.join(base_path, 'train_map.txt'),
            os.path.join(base_path, 'CIFAR-10_mean.xml'), True)
        reader_test = TrainResNet_CIFAR10.create_reader(
            os.path.join(base_path, 'test_map.txt'),
            os.path.join(base_path, 'CIFAR-10_mean.xml'), False)
        TrainResNet_CIFAR10.train_and_evaluate(reader_train,
                                               reader_test,
                                               'resnet20',
                                               epoch_size=512,
                                               max_epochs=1,
                                               profiler_dir=None,
                                               model_dir=output_dir)

    return base_path
コード例 #31
0
def test_binary_convnet_error(device_id):
    if cntk_device(device_id).type() != DeviceKind_GPU:
        pytest.skip('test only runs on GPU')
    try_set_default_device(cntk_device(device_id))

    base_path = prepare_CIFAR10_data()
    # change dir to locate data.zip correctly
    os.chdir(base_path)

    from _cntk_py import set_fixed_random_seed, force_deterministic_algorithms
    set_fixed_random_seed(1)
    force_deterministic_algorithms()

    reader_train = create_reader(os.path.join(base_path, 'train_map.txt'), os.path.join(base_path, 'CIFAR-10_mean.xml'), False)
    model = create_binary_convolution_model()
    z, criterion = get_z_and_criterion(model)
    train_loss, metric = train_model(reader_train, z, criterion, epoch_size=8192, max_epochs=5)

    expected_loss_metric = (2.677057718858123, 0.6043701171875)
    assert np.allclose((train_loss, metric), expected_loss_metric, atol=TOLERANCE_ABSOLUTE)

    # save and load (as an illustration)
    model_path = "model.cmf"
    model.save(model_path)
    eval_device = C.cpu()
    model = Function.load(model_path, device=eval_device)

    # test
    model_with_native_binary_convolutions = clone_with_native_binary_convolutions(model)
    _, criterion = get_z_and_criterion(model_with_native_binary_convolutions)

    reader_test = create_reader(os.path.join(base_path, 'test_map.txt'), os.path.join(base_path, 'CIFAR-10_mean.xml'), False)
    test_loss, metric = evaluate(reader_test, criterion, device=eval_device, minibatch_size=1, max_samples=200)

    expected_loss_metric = (0.0, 0.695)
    assert np.allclose((test_loss, metric), expected_loss_metric, atol=TOLERANCE_ABSOLUTE)
コード例 #32
0
def train_cifar_resnet_for_eval(test_device, output_dir):

    output_dir = os.path.abspath(output_dir)
    if not os.path.isdir(output_dir):
        os.mkdir(output_dir)
    base_path = prepare_test_data.prepare_CIFAR10_data()

    # change dir to locate data.zip correctly
    os.chdir(base_path)

    # unzip test images for eval
    with zipfile.ZipFile(os.path.join(base_path, 'cifar-10-batches-py', 'data.zip')) as myzip:
        for fn in range(6):
            myzip.extract('data/train/%05d.png'%(fn), output_dir)
  
    if test_device == 'cpu':
        print('train cifar_resnet only on GPU device. Use pre-trained models.')
    else:
        print('training cifar_resnet on GPU device...')
        reader_train = TrainResNet_CIFAR10.create_reader(os.path.join(base_path, 'train_map.txt'), os.path.join(base_path, 'CIFAR-10_mean.xml'), True)
        reader_test  = TrainResNet_CIFAR10.create_reader(os.path.join(base_path, 'test_map.txt'), os.path.join(base_path, 'CIFAR-10_mean.xml'), False)
        TrainResNet_CIFAR10.train_and_evaluate(reader_train, reader_test, 'resnet20', epoch_size=512, max_epochs=1, profiler_dir=None, model_dir=output_dir)

    return base_path
コード例 #33
0
def test_cifar_convnet_distributed_1bitsgd(device_id):
    params = [
        "-n", "2", "-m", "64", "-e", "3200", "-datadir",
        prepare_CIFAR10_data(), "-q", "1", "-r", "-device", "0"
    ]
    mpiexec_test(device_id, script_under_test, params, 0.75, True)
コード例 #34
0
import numpy as np
import os
import sys
import pytest
import subprocess

abs_path = os.path.dirname(os.path.abspath(__file__))
example_dir = os.path.join(abs_path, "..", "..", "..", "..", "Examples",
                           "Image", "Classification", "ResNet", "Python")
sys.path.append(example_dir)
sys.path.append(abs_path)

from distributed_common import mpiexec_test
from prepare_test_data import prepare_CIFAR10_data

base_path = prepare_CIFAR10_data()
# change dir to locate data.zip correctly
os.chdir(base_path)

script_under_test = os.path.join(example_dir,
                                 "TrainResNet_CIFAR10_Distributed.py")

mpiexec_params = ["-n", "2"]


def test_cifar_resnet_distributed(device_id):
    params = [
        "-e", "2", "-datadir", base_path, "-q", "32", "-es", "512", "-r",
        "-device",
        str(device_id)
    ]
コード例 #35
0
import signal
import shutil
import subprocess
import re
import pytest

abs_path = os.path.dirname(os.path.abspath(__file__))
sys.path.append(abs_path)
example_dir = os.path.join(abs_path, "..", "..", "..", "..", "Examples", "Image", "Classification", "ConvNet", "Python")
sys.path.append(example_dir)
script_under_test = os.path.join(example_dir, "ConvNet_CIFAR10_DataAug_Distributed.py")

from distributed_common import mpiexec_test, mpiexec_execute
from prepare_test_data import prepare_CIFAR10_data

base_path = prepare_CIFAR10_data()
# change dir to locate data.zip correctly
os.chdir(base_path)

mpiexec_params = [ "-n", "2"]


def test_cifar_convnet_distributed(device_id):
    # Create a path to TensorBoard log directory and make sure it does not exist.
    abs_path = os.path.dirname(os.path.abspath(__file__))
    tb_logdir = os.path.join(abs_path, 'ConvNet_CIFAR10_DataAug_Distributed_test_log')
    if os.path.exists(tb_logdir):
        shutil.rmtree(tb_logdir)

    params = [ "-n", "2",
               "-m", "64",