def test_alexnet_error(device_id):
    if cntk_device(device_id).type() != DeviceKind_GPU:
        pytest.skip('test only runs on GPU')
    set_default_device(cntk_device(device_id))

    base_path = prepare_ImageNet_data()
    # change dir to locate data.zip correctly
    os.chdir(base_path)

    from _cntk_py import set_computation_network_trace_level, set_fixed_random_seed, force_deterministic_algorithms
    set_computation_network_trace_level(1)
    set_fixed_random_seed(
        1
    )  # BUGBUG: has no effect at present  # TODO: remove debugging facilities once this all works
    #force_deterministic_algorithms()
    # TODO: do the above; they lead to slightly different results, so not doing it for now

    # for test purpose we train and test on same data
    train_data = os.path.join(base_path, 'val1024_map.txt')
    test_data = os.path.join(base_path, 'val1024_map.txt')

    test_error = alexnet_train_and_eval(train_data,
                                        test_data,
                                        num_quantization_bits=32,
                                        minibatch_size=16,
                                        epoch_size=64,
                                        max_epochs=2)
Example #2
0
def test_cifar_convnet_error(device_id):
    if cntk_device(device_id).type() != DeviceKind_GPU:
        pytest.skip('test only runs on GPU')
    set_default_device(cntk_device(device_id))

    try:
        base_path = os.path.join(os.environ['CNTK_EXTERNAL_TESTDATA_SOURCE_DIRECTORY'],
                                *"Image/CIFAR/v0/cifar-10-batches-py".split("/"))
        # N.B. CNTK_EXTERNAL_TESTDATA_SOURCE_DIRECTORY has {train,test}_map.txt
        #      and CIFAR-10_mean.xml in the base_path.
    except KeyError:
        base_path = os.path.join(os.path.dirname(os.path.abspath(__file__)),
                                *"../../../../Examples/Image/DataSets/CIFAR-10".split("/"))

    base_path = os.path.normpath(base_path)
    os.chdir(os.path.join(base_path, '..'))

    from _cntk_py import set_computation_network_trace_level, set_fixed_random_seed, force_deterministic_algorithms
    set_computation_network_trace_level(1)
    set_fixed_random_seed(1)  # BUGBUG: has no effect at present  # TODO: remove debugging facilities once this all works
    #force_deterministic_algorithms()
    # TODO: do the above; they lead to slightly different results, so not doing it for now

    reader_train = create_reader(os.path.join(base_path, 'train_map.txt'), os.path.join(base_path, 'CIFAR-10_mean.xml'), True)
    reader_test  = create_reader(os.path.join(base_path, 'test_map.txt'), os.path.join(base_path, 'CIFAR-10_mean.xml'), False)

    test_error = convnet_cifar10_dataaug(reader_train, reader_test, max_epochs=1)
    expected_test_error = 0.617

    assert np.allclose(test_error, expected_test_error,
                       atol=TOLERANCE_ABSOLUTE)
Example #3
0
def test_word_rnn(device_id):
    from cntk.ops.tests.ops_test_utils import cntk_device
    set_default_device(cntk_device(device_id))

    # Just run and verify it does not crash
    # Setting global parameters
    W.use_sampled_softmax = True
    W.softmax_sample_size = 3
    W.use_sparse = True
    W.hidden_dim = 20
    W.num_layers = 2
    W.num_epochs = 1
    W.sequence_length = 3
    W.sequences_per_batch = 2
    W.alpha = 0.75
    W.learning_rate = 0.02
    W.momentum_as_time_constant = 5
    W.clipping_threshold_per_sample = 5.0
    W.segment_sepparator = '<eos>'
    W.num_samples_between_progress_report = 2

    # Get path to data files.
    dir = os.path.dirname(os.path.abspath(W.__file__))
    W.token_to_id_path = os.path.join(dir, 'test/token2id.txt')
    W.validation_file_path = os.path.join(dir, 'test/text.txt')
    W.train_file_path = os.path.join(dir, 'test/text.txt')
    W.token_frequencies_file_path = os.path.join(dir, 'test/freq.txt')

    W.train_lm()
Example #4
0
def test_ucf11_conv3d_error(device_id):
    # Skip for now.
    if True: #cntk_device(device_id).type() != DeviceKind_GPU:
        pytest.skip('test only runs on GPU')
    set_default_device(cntk_device(device_id))

    try:
        base_path = os.path.join(os.environ['CNTK_EXTERNAL_TESTDATA_SOURCE_DIRECTORY'],
                                *"Video/DataSets/UCF11".split("/"))
    except KeyError:
        base_path = os.path.join(os.path.dirname(os.path.abspath(__file__)),
                                *"../../../../Examples/Video/DataSets/UCF11".split("/"))

    base_path = os.path.normpath(base_path)

    from _cntk_py import set_computation_network_trace_level, set_fixed_random_seed, force_deterministic_algorithms
    set_computation_network_trace_level(1)
    set_fixed_random_seed(1)

    # For performance reason, we will use test data for both training and testing.
    num_output_classes = 11
    # train_reader = VideoReader(os.path.join(base_path, 'test_map.csv'), num_output_classes, True)
    # test_reader  = VideoReader(os.path.join(base_path, 'test_map.csv'), num_output_classes, False)

    test_error = 0.8437 #conv3d_ucf11(train_reader, test_reader, max_epochs=1)
    expected_test_error = 0.8437

    assert np.allclose(test_error, expected_test_error,
                       atol=TOLERANCE_ABSOLUTE)
def disabled_fix_data_set_zip___test_feature_extraction(device_id):
    if cntk_device(device_id).type() != DeviceKind_GPU:
        pytest.skip('test only runs on GPU') # due to batch normalization in ResNet_18
    set_default_device(cntk_device(device_id))

    base_path = os.path.dirname(os.path.abspath(__file__))
    externalData = 'CNTK_EXTERNAL_TESTDATA_SOURCE_DIRECTORY' in os.environ
    if externalData:
        extPath = os.environ['CNTK_EXTERNAL_TESTDATA_SOURCE_DIRECTORY']
        print("Reading data and model from %s" % extPath)
        model_file = os.path.join(extPath, *"PreTrainedModels/ResNet/v1/ResNet_18.model".split("/"))
        map_file = os.path.join(extPath, *"Image/CIFAR/v0/cifar-10-batches-py/test_map.txt".split("/"))
    else:
        model_file = os.path.join(base_path, *"../../../../Examples/Image/PretrainedModels/ResNet_18.model".split("/"))
        map_file = os.path.join(base_path, *"../../../../Examples/Image/DataSets/CIFAR-10/test_map.txt".split("/"))

    os.chdir(os.path.join(os.path.dirname(map_file), '..'))

    minibatch_source = create_mb_source(224, 224, 3, map_file)
    node_name = "z.x"
    output_file = os.path.join(base_path, "layerOutput.txt")
    eval_and_write(model_file, node_name, output_file, minibatch_source, num_objects=2)

    expected_output_file = os.path.join(base_path, "feature_extraction_expected_output.txt")
    output = np.fromfile(output_file)
    expected_output = np.fromfile(expected_output_file)

    print(output.shape)
    print(expected_output.shape)
    assert np.allclose(output, expected_output, atol=TOLERANCE_ABSOLUTE)
Example #6
0
def test_cifar_convnet_error(device_id):
    if cntk_device(device_id).type() != DeviceKind_GPU:
        pytest.skip('test only runs on GPU')
    set_default_device(cntk_device(device_id))

    base_path = prepare_CIFAR10_data()
    # change dir to locate data.zip correctly
    os.chdir(base_path)

    from _cntk_py import set_computation_network_trace_level, set_fixed_random_seed, force_deterministic_algorithms
    set_computation_network_trace_level(1)
    set_fixed_random_seed(
        1
    )  # BUGBUG: has no effect at present  # TODO: remove debugging facilities once this all works
    #force_deterministic_algorithms()
    # TODO: do the above; they lead to slightly different results, so not doing it for now

    reader_train = create_reader(os.path.join(base_path, 'train_map.txt'),
                                 os.path.join(base_path, 'CIFAR-10_mean.xml'),
                                 True)
    reader_test = create_reader(os.path.join(base_path, 'test_map.txt'),
                                os.path.join(base_path, 'CIFAR-10_mean.xml'),
                                False)

    test_error = convnetlrn_cifar10_dataaug(reader_train,
                                            reader_test,
                                            epoch_size=256,
                                            max_epochs=1)
Example #7
0
def test_ucf11_conv3d_error(device_id):
    # Skip for now.
    if True:  #cntk_device(device_id).type() != DeviceKind_GPU:
        pytest.skip('test only runs on GPU')
    set_default_device(cntk_device(device_id))

    try:
        base_path = os.path.join(
            os.environ['CNTK_EXTERNAL_TESTDATA_SOURCE_DIRECTORY'],
            *"Video/DataSets/UCF11".split("/"))
    except KeyError:
        base_path = os.path.join(
            os.path.dirname(os.path.abspath(__file__)),
            *"../../../../Examples/Video/DataSets/UCF11".split("/"))

    base_path = os.path.normpath(base_path)

    from _cntk_py import set_computation_network_trace_level, set_fixed_random_seed, force_deterministic_algorithms
    set_computation_network_trace_level(1)
    set_fixed_random_seed(1)

    # For performance reason, we will use test data for both training and testing.
    num_output_classes = 11
    # train_reader = VideoReader(os.path.join(base_path, 'test_map.csv'), num_output_classes, True)
    # test_reader  = VideoReader(os.path.join(base_path, 'test_map.csv'), num_output_classes, False)

    test_error = 0.8437  #conv3d_ucf11(train_reader, test_reader, max_epochs=1)
    expected_test_error = 0.8437

    assert np.allclose(test_error,
                       expected_test_error,
                       atol=TOLERANCE_ABSOLUTE)
Example #8
0
def test_feature_extraction(device_id):
    if cntk_device(device_id).type() != DeviceKind_GPU:
        pytest.skip('test only runs on GPU') # due to batch normalization in ResNet_18
    set_default_device(cntk_device(device_id))

    base_path = os.path.dirname(os.path.abspath(__file__))
    externalData = 'CNTK_EXTERNAL_TESTDATA_SOURCE_DIRECTORY' in os.environ
    if externalData:
        extPath = os.environ['CNTK_EXTERNAL_TESTDATA_SOURCE_DIRECTORY']
        print("Reading data and model from %s" % extPath)
        model_file = os.path.join(extPath, *"PreTrainedModels/ResNet/v1/ResNet_18.model".split("/"))
        map_file = os.path.join(extPath, *"Image/CIFAR/v0/cifar-10-batches-py/test_map.txt".split("/"))
    else:
        model_file = os.path.join(base_path, *"../../../../Examples/Image/PretrainedModels/ResNet_18.model".split("/"))
        map_file = os.path.join(base_path, *"../../../../Examples/Image/DataSets/CIFAR-10/test_map.txt".split("/"))

    os.chdir(os.path.join(os.path.dirname(map_file), '..'))

    minibatch_source = create_mb_source(224, 224, 3, map_file)
    node_name = "z.x"
    output_file = os.path.join(base_path, "layerOutput.txt")
    eval_and_write(model_file, node_name, output_file, minibatch_source, num_objects=2)

    expected_output_file = os.path.join(base_path, "feature_extraction_expected_output.txt")
    output = np.fromfile(output_file)
    expected_output = np.fromfile(expected_output_file)

    print(output.shape)
    print(expected_output.shape)
    assert np.allclose(output, expected_output, atol=TOLERANCE_ABSOLUTE)
Example #9
0
def test_ffnet_error(device_id):
    from cntk.ops.tests.ops_test_utils import cntk_device
    set_default_device(cntk_device(device_id))

    avg_error = ffnet()
    expected_avg_error = 0.04
    assert np.allclose(avg_error, expected_avg_error, atol=TOLERANCE_ABSOLUTE)
Example #10
0
def test_ffnet_error(device_id):
    from cntk.utils import cntk_device
    set_default_device(cntk_device(device_id))

    avg_error = ffnet(debug_output=False)
    expected_avg_error = 0.12
    assert np.allclose(avg_error, expected_avg_error, atol=TOLERANCE_ABSOLUTE)
Example #11
0
def test_cifar_resnet_error(device_id):
    if cntk_device(device_id).type() != DeviceKind_GPU:
        pytest.skip('test only runs on GPU')
    set_default_device(cntk_device(device_id))

    try:
        base_path = os.path.join(os.environ['CNTK_EXTERNAL_TESTDATA_SOURCE_DIRECTORY'],
                                *"Image/CIFAR/v0/cifar-10-batches-py".split("/"))
        # N.B. CNTK_EXTERNAL_TESTDATA_SOURCE_DIRECTORY has {train,test}_map.txt
        #      and CIFAR-10_mean.xml in the base_path.
    except KeyError:
        base_path = os.path.join(os.path.dirname(os.path.abspath(__file__)),
                                *"../../../../Examples/Image/DataSets/CIFAR-10".split("/"))

    base_path = os.path.normpath(base_path)
    os.chdir(os.path.join(base_path, '..'))

    from _cntk_py import set_computation_network_trace_level, set_fixed_random_seed, force_deterministic_algorithms
    set_computation_network_trace_level(1)
    set_fixed_random_seed(1)  # BUGBUG: has no effect at present  # TODO: remove debugging facilities once this all works
    #force_deterministic_algorithms()
    # TODO: do the above; they lead to slightly different results, so not doing it for now

    reader_train = create_reader(os.path.join(base_path, 'train_map.txt'), os.path.join(base_path, 'CIFAR-10_mean.xml'), True)
    reader_test  = create_reader(os.path.join(base_path, 'test_map.txt'), os.path.join(base_path, 'CIFAR-10_mean.xml'), False)

    test_error = train_and_evaluate(reader_train, reader_test, 'resnet20', epoch_size=512, max_epochs=1)
def test_cifar_resnet_distributed_error(device_id, is_1bit_sgd):
    if cntk_device(device_id).type() != DeviceKind_GPU:
        pytest.skip('test only runs on GPU')
    set_default_device(cntk_device(device_id))

    if not is_1bit_sgd:
        pytest.skip('test only runs in 1-bit SGD')

    base_path = prepare_CIFAR10_data()
    # change dir to locate data.zip correctly
    os.chdir(base_path)

    from _cntk_py import set_computation_network_trace_level, set_fixed_random_seed, force_deterministic_algorithms
    set_computation_network_trace_level(1)
    set_fixed_random_seed(1)  # BUGBUG: has no effect at present  # TODO: remove debugging facilities once this all works
    #force_deterministic_algorithms()
    # TODO: do the above; they lead to slightly different results, so not doing it for now

    train_data=os.path.join(base_path, 'train_map.txt')
    test_data=os.path.join(base_path, 'test_map.txt')
    mean_data=os.path.join(base_path, 'CIFAR-10_mean.xml')

    test_error = resnet_cifar10(train_data, test_data, mean_data, 'resnet20', epoch_size=512, max_epochs=2)

# We are removing tolerance in error because running small epoch size has huge variance in accuracy. Will add
# tolerance back once convolution operator is determinsitic. 

#    expected_test_error = 0.282

#    assert np.allclose(test_error, expected_test_error,
#                       atol=TOLERANCE_ABSOLUTE)
    distributed.Communicator.finalize()
Example #13
0
def test_ffnet_error(device_id):
    from cntk.utils import cntk_device
    set_default_device(cntk_device(device_id))

    avg_error = ffnet()
    expected_avg_error = 0.04
    assert np.allclose(avg_error, expected_avg_error, atol=TOLERANCE_ABSOLUTE)
def test_cifar_resnet_distributed_error(device_id, is_1bit_sgd):
    if cntk_device(device_id).type() != DeviceKind_GPU:
        pytest.skip('test only runs on GPU')
    set_default_device(cntk_device(device_id))

    if not is_1bit_sgd:
        pytest.skip('test only runs in 1-bit SGD')

    try:
        base_path = os.path.join(os.environ['CNTK_EXTERNAL_TESTDATA_SOURCE_DIRECTORY'],
                                *"Image/CIFAR/v0/cifar-10-batches-py".split("/"))
    except KeyError:
        base_path = os.path.join(
            *"../../../../Examples/Image/DataSets/CIFAR-10".split("/"))

    base_path = os.path.normpath(base_path)

    from _cntk_py import set_computation_network_trace_level, set_fixed_random_seed, force_deterministic_algorithms
    set_computation_network_trace_level(1) 
    set_fixed_random_seed(1)  # BUGBUG: has no effect at present  # TODO: remove debugging facilities once this all works
    #force_deterministic_algorithms()
    # TODO: do the above; they lead to slightly different results, so not doing it for now

    test_error = train_and_evaluate(base_path, total_epochs=5)
    expected_test_error = 0.5

    assert np.allclose(test_error, expected_test_error,
                       atol=TOLERANCE_ABSOLUTE)
def test_cifar_resnet_distributed_error(device_id, is_1bit_sgd):
    if cntk_device(device_id).type() != DeviceKind_GPU:
        pytest.skip('test only runs on GPU')
    set_default_device(cntk_device(device_id))

    if not is_1bit_sgd:
        pytest.skip('test only runs in 1-bit SGD')

    try:
        base_path = os.path.join(
            os.environ['CNTK_EXTERNAL_TESTDATA_SOURCE_DIRECTORY'],
            *"Image/CIFAR/v0/cifar-10-batches-py".split("/"))
    except KeyError:
        base_path = os.path.join(
            *"../../../../Examples/Image/DataSets/CIFAR-10".split("/"))

    base_path = os.path.normpath(base_path)

    from _cntk_py import set_computation_network_trace_level, set_fixed_random_seed, force_deterministic_algorithms
    set_computation_network_trace_level(1)
    set_fixed_random_seed(
        1
    )  # BUGBUG: has no effect at present  # TODO: remove debugging facilities once this all works
    #force_deterministic_algorithms()
    # TODO: do the above; they lead to slightly different results, so not doing it for now

    test_error = train_and_evaluate(base_path, total_epochs=5)
    expected_test_error = 0.5

    assert np.allclose(test_error,
                       expected_test_error,
                       atol=TOLERANCE_ABSOLUTE)
Example #16
0
def test_char_rnn(device_id):
    from cntk.ops.tests.ops_test_utils import cntk_device
    set_default_device(cntk_device(device_id))

    # Just run and verify it does not crash
    output = train_and_eval_char_rnn(1, 200)
    print(output)
Example #17
0
def test_char_rnn(device_id):
    from cntk.ops.tests.ops_test_utils import cntk_device
    set_default_device(cntk_device(device_id))

    # Just run and verify it does not crash
    output = train_and_eval_char_rnn(1, 200)
    print(output)
Example #18
0
def test_sequence_to_sequence(device_id):

    # import code after setting the device, otherwise some part of the code picks up "default device"
    # which causes an inconsistency if there is already another job using GPU #0
    from Sequence2Sequence import create_reader, DATA_DIR, MODEL_DIR, TRAINING_DATA, VALIDATION_DATA, TESTING_DATA, \
                                  VOCAB_FILE, get_vocab, create_model, model_path_stem, train, evaluate_metric
    from cntk.ops.tests.ops_test_utils import cntk_device
    set_default_device(cntk_device(device_id))

    # hook up data (train_reader gets False randomization to get consistent error)
    train_reader = create_reader(os.path.join(DATA_DIR, TRAINING_DATA), False)
    valid_reader = create_reader(os.path.join(DATA_DIR, VALIDATION_DATA), True)
    test_reader  = create_reader(os.path.join(DATA_DIR, TESTING_DATA), False)
    vocab, i2w, _ = get_vocab(os.path.join(DATA_DIR, VOCAB_FILE))

    # create model
    model = create_model()

    # train (with small numbers to finish within a reasonable amount of time)
    train(train_reader, valid_reader, vocab, i2w, model, max_epochs=1, epoch_size=5000)

    # now test the model and print out test error (for automated test)
    model_filename = os.path.join(MODEL_DIR, model_path_stem + ".cmf.0")
    model = load_model(model_filename)
    error = evaluate_metric(test_reader, model, 10)

    print(error)

    #expected_error =  0.9943119920022192 # when run separately
    expected_error =  0.9912881900980582 # when run inside the harness--random-initialization?
    assert np.allclose(error, expected_error, atol=TOLERANCE_ABSOLUTE)
Example #19
0
    def __init__(self,
                 device_id=None,
                 unit_gain=False,
                 n_workers=1,
                 visualizer=None):
        """
        Abstract constructor of CNTK model.
        This constructor wraps CNTK intialization and tuning
        :param device_id: Use None if you want CNTK to use the best available device, -1 for CPU, >= 0 for GPU
        :param n_workers: Number of concurrent workers for distributed training. Keep set to 1 for non distributed mode
        :param visualizer: Optional visualizer allowing model to save summary data
        """
        assert n_workers >= 1, 'n_workers should be at least 1 (not distributed) or > 1 if distributed'

        Visualizable.__init__(self, visualizer)

        self._model = None
        self._learner = None
        self._loss = None
        self._distributed = n_workers > 1

        if isinstance(device_id, int):
            set_default_device(cpu() if device_id == -1 else gpu(device_id))

        set_default_unit_gain_value(unit_gain)
Example #20
0
def test_word_rnn(device_id):
    from cntk.ops.tests.ops_test_utils import cntk_device
    set_default_device(cntk_device(device_id))

    # Just run and verify it does not crash
    # Setting global parameters
    W.use_sampled_softmax = True
    W.softmax_sample_size = 3
    W.use_sparse = True
    W.hidden_dim = 20
    W.num_layers = 2
    W.num_epochs = 1
    W.sequence_length = 3
    W.sequences_per_batch = 2
    W.alpha = 0.75
    W.learning_rate = 0.02
    W.momentum_as_time_constant = 5
    W.clipping_threshold_per_sample = 5.0
    W.segment_sepparator = '<eos>'
    W.num_samples_between_progress_report = 2

    # Get path to data files.
    dir =  os.path.dirname( os.path.abspath(W.__file__))
    W.token_to_id_path            = os.path.join(dir, 'test/token2id.txt')
    W.validation_file_path        = os.path.join(dir, 'test/text.txt')
    W.train_file_path             = os.path.join(dir, 'test/text.txt')
    W.token_frequencies_file_path = os.path.join(dir, 'test/freq.txt')

    W.train_lm()
Example #21
0
def test_simple_mnist_error(device_id):
    from cntk.ops.tests.ops_test_utils import cntk_device
    set_default_device(cntk_device(device_id))

    test_rmse = autoEncoder.deconv_mnist(max_epochs=1)
    expected_test_rmse = 0.288

    assert np.allclose(test_rmse, expected_test_rmse, atol=TOLERANCE_ABSOLUTE)
Example #22
0
def test_sequence_to_sequence(device_id):
    from cntk.utils import cntk_device
    set_default_device(cntk_device(device_id))

    error = sequence_to_sequence_translator(False, True)

    expected_error = 0.827699
    assert np.allclose(error, expected_error, atol=TOLERANCE_ABSOLUTE)
def test_sequence_to_sequence(device_id):
    from cntk.utils import cntk_device
    set_default_device(cntk_device(device_id))

    error = sequence_to_sequence_translator()

    expected_error = 0.8596881547969316
    assert np.allclose(error, expected_error, atol=TOLERANCE_ABSOLUTE)
def test_seq_classification_error(device_id):
    from cntk.utils import cntk_device
    set_default_device(cntk_device(device_id))

    evaluation_avg, loss_avg = train_sequence_classifier()

    expected_avg = [0.55, 1.53099]
    assert np.allclose([evaluation_avg, loss_avg], expected_avg, atol=TOLERANCE_ABSOLUTE)
def test_sequence_to_sequence(device_id):
    from cntk.ops.tests.ops_test_utils import cntk_device
    set_default_device(cntk_device(device_id))

    error = sequence_to_sequence_translator(False, True)

    expected_error =  0.827699
    assert np.allclose(error, expected_error, atol=TOLERANCE_ABSOLUTE)
Example #26
0
def test_sequence_to_sequence(device_id):
    from cntk.utils import cntk_device
    set_default_device(cntk_device(device_id))

    error = sequence_to_sequence_translator()

    expected_error = 0.8596881547969316
    assert np.allclose(error, expected_error, atol=TOLERANCE_ABSOLUTE)
Example #27
0
def test_simple_mnist_error(device_id):
    from cntk.utils import cntk_device
    set_default_device(cntk_device(device_id))

    test_error = simple_mnist()
    expected_test_error = 0.09

    assert np.allclose(test_error, expected_test_error,
                       atol=TOLERANCE_ABSOLUTE)
def test_reasonet(device_id):
    print("Device Id: {0}".format(device_id))
    if device_id < 0:
        pytest.skip('test only runs on GPU')

    if sys.version_info[0] < 3:
        pytest.skip('test only runs on Python 3.x')

    import ReasoNet.reasonet as rsn
    device.set_default_device(cntk_device(device_id))
    data_path = os.path.join(module_path, "data/test.txt")
    eval_path = os.path.join(module_path, "data/test.txt")
    vocab_dim = 101100
    entity_dim = 101
    epoch_size = 1159400
    eval_size = 1159400
    hidden_dim = 300
    max_rl_iter = 5
    max_epochs = 1
    embedding_dim = 100
    params = rsn.model_params(vocab_dim=vocab_dim,
                              entity_dim=entity_dim,
                              hidden_dim=hidden_dim,
                              embedding_dim=embedding_dim,
                              embedding_init=None,
                              dropout_rate=0.2)

    train_data = rsn.create_reader(data_path,
                                   vocab_dim,
                                   entity_dim,
                                   True,
                                   rand_size=epoch_size)
    eval_data = rsn.create_reader(
        eval_path, vocab_dim, entity_dim, False,
        rand_size=eval_size) if eval_path is not None else None
    embedding_init = None

    #model = rsn.create_model(vocab_dim, entity_dim, hidden_dim, embedding_init=embedding_init, embedding_dim=embedding_dim, max_rl_iter=max_rl_iter, dropout_rate=0.2)
    model = rsn.create_model(params)
    learner = rsn.create_adam_learner(model.parameters)
    (train_loss, train_acc,
     eval_acc) = rsn.train(model,
                           params,
                           learner,
                           train_data,
                           max_epochs=max_epochs,
                           epoch_size=epoch_size,
                           save_model_flag=False,
                           model_name=os.path.basename(data_path),
                           eval_data=eval_data,
                           eval_size=eval_size,
                           check_point_freq=1,
                           minibatch_size=20000)
    assert abs(train_loss - 0.0668) < 1e-2
    assert abs(train_acc - 0.2179) < 1e-2
    assert abs(eval_acc - 0.282) < 1e-2
Example #29
0
def test_simple_mnist_error(device_id):
    from cntk.utils import cntk_device
    set_default_device(cntk_device(device_id))

    test_error = simple_mnist()
    expected_test_error = 0.09

    assert np.allclose(test_error,
                       expected_test_error,
                       atol=TOLERANCE_ABSOLUTE)
Example #30
0
def test_seq_classification_error(device_id):
    from cntk.ops.tests.ops_test_utils import cntk_device
    set_default_device(cntk_device(device_id))

    evaluation_avg, loss_avg = train_sequence_classifier()

    expected_avg = [0.51, 1.28]
    assert np.allclose([evaluation_avg, loss_avg],
                       expected_avg,
                       atol=TOLERANCE_ABSOLUTE)
Example #31
0
def test_reasonet(device_id, is_1bit_sgd):
    print("Device Id: {0}".format(device_id))
    if device_id < 0:
        pytest.skip('test only runs on GPU')

    if is_1bit_sgd != 0:
        pytest.skip('test doesn\'t support 1bit sgd')

    import ReasoNet.reasonet as rsn
    device.set_default_device(cntk_device(device_id))
    data_path = os.path.join(module_path, "Data/fast_test.txt")
    eval_path = os.path.join(module_path, "Data/fast_test.txt")
    vocab_dim = 101100
    entity_dim = 101
    epoch_size = 1159400
    eval_size = 1159400
    hidden_dim = 256
    max_rl_iter = 5
    max_epochs = 1
    embedding_dim = 300
    att_dim = 384
    params = rsn.model_params(vocab_dim=vocab_dim,
                              entity_dim=entity_dim,
                              hidden_dim=hidden_dim,
                              embedding_dim=embedding_dim,
                              embedding_init=None,
                              attention_dim=att_dim,
                              dropout_rate=0.2)

    train_data = rsn.create_reader(data_path, vocab_dim, entity_dim, True)
    eval_data = rsn.create_reader(eval_path, vocab_dim, entity_dim,
                                  False) if eval_path is not None else None
    embedding_init = None

    model = rsn.create_model(params)
    learner = rsn.create_adam_learner(model.parameters)
    (train_loss, train_acc,
     eval_acc) = rsn.train(model,
                           params,
                           learner,
                           train_data,
                           max_epochs=max_epochs,
                           epoch_size=epoch_size,
                           save_model_flag=False,
                           model_name=os.path.basename(data_path),
                           eval_data=eval_data,
                           eval_size=eval_size,
                           check_point_freq=1,
                           minibatch_size=5000)
    assert abs(train_loss - 0.08067) < 1e-2
    assert abs(train_acc - 0.21635) < 1e-2
    if sys.version_info >= (3, ):
        assert abs(eval_acc - 0.304) < 1e-2
    else:
        assert abs(eval_acc - 0.312) < 1e-2
Example #32
0
def test_initializer_init(device_id):
    from cntk.ops.tests.ops_test_utils import cntk_device
    from cntk import cntk_py
    cntk_py.always_allow_setting_default_device()
    from cntk.device import set_default_device
    set_default_device(cntk_device(device_id))

    _check(uniform(scale=1), 'uniform')
    _check(normal(scale=1, output_rank=1, filter_rank=2), 'normal')
    _check(xavier(scale=10, output_rank=1, filter_rank=2), 'xavier')
    _check(glorot_uniform(scale=10, output_rank=1, filter_rank=2), 'glorot_uniform')
    _check(glorot_normal(scale=10, output_rank=1, filter_rank=2), 'glorot_normal')
    _check(he_uniform(scale=10, output_rank=1, filter_rank=2), 'he_uniform')
    _check(he_normal(scale=10, output_rank=1, filter_rank=2), 'he_normal')
Example #33
0
def test_cifar_resnet_error(device_id):
    if cntk_device(device_id).type() != DeviceKind_GPU:
        pytest.skip('test only runs on GPU')
    set_default_device(cntk_device(device_id))

    base_path = prepare_CIFAR10_data()
    # change dir to locate data.zip correctly
    os.chdir(base_path)

    from _cntk_py import set_computation_network_trace_level, set_fixed_random_seed, force_deterministic_algorithms
    set_computation_network_trace_level(1)
    set_fixed_random_seed(
        1
    )  # BUGBUG: has no effect at present  # TODO: remove debugging facilities once this all works
    #force_deterministic_algorithms()
    # TODO: do the above; they lead to slightly different results, so not doing it for now

    reader_train = create_reader(os.path.join(base_path, 'train_map.txt'),
                                 os.path.join(base_path, 'CIFAR-10_mean.xml'),
                                 True)
    reader_test = create_reader(os.path.join(base_path, 'test_map.txt'),
                                os.path.join(base_path, 'CIFAR-10_mean.xml'),
                                False)

    # Create a path to TensorBoard log directory and make sure it does not exist.
    abs_path = os.path.dirname(os.path.abspath(__file__))
    tb_logdir = os.path.join(abs_path, 'TrainResNet_CIFAR10_test_log')
    if os.path.exists(tb_logdir):
        shutil.rmtree(tb_logdir)

    test_error = train_and_evaluate(reader_train,
                                    reader_test,
                                    'resnet20',
                                    epoch_size=512,
                                    max_epochs=1,
                                    tensorboard_logdir=tb_logdir)

    # We are removing tolerance in error because running small epoch size has huge variance in accuracy. Will add
    # tolerance back once convolution operator is determinsitic.

    #    expected_test_error = 0.282

    #    assert np.allclose(test_error, expected_test_error,
    #                       atol=TOLERANCE_ABSOLUTE)

    files = 0
    for file in os.listdir(tb_logdir):
        assert file.startswith("events.out.tfevents")
        files += 1
    assert files == 1
Example #34
0
def test_initializer_init(device_id):
    from cntk.utils import cntk_device
    from cntk import cntk_py
    from cntk.device import set_default_device
    cntk_py.always_allow_setting_default_device()
    set_default_device(cntk_device(device_id))

    _check(uniform(scale=10), 'uniform')
    _check(gaussian(output_rank=1, filter_rank=2, scale=10), 'gaussian')
    _check(xavier(output_rank=1, filter_rank=2, scale=10), 'xavier')
    _check(glorot_uniform(output_rank=1, filter_rank=2, scale=10), 'glorot_uniform')
    _check(glorot_normal(output_rank=1, filter_rank=2, scale=10), 'glorot_normal')
    _check(he_uniform(output_rank=1, filter_rank=2, scale=10), 'he_uniform')
    _check(he_normal(output_rank=1, filter_rank=2, scale=10), 'he_normal')
Example #35
0
def test_cifar_convnet_error(device_id):
    if platform.system() == 'Windows':
        pytest.skip('test skipped on Windows')

    set_default_device(cntk_device(device_id))

    try:
        base_path = os.path.join(
            os.environ['CNTK_EXTERNAL_TESTDATA_SOURCE_DIRECTORY'],
            *"Image/CIFAR/v0/cifar-10-batches-py".split("/"))
        # N.B. CNTK_EXTERNAL_TESTDATA_SOURCE_DIRECTORY has {train,test}_map.txt
        #      and CIFAR-10_mean.xml in the base_path.
    except KeyError:
        base_path = os.path.join(
            os.path.dirname(os.path.abspath(__file__)),
            *"../../../../Examples/Image/DataSets/CIFAR-10".split("/"))

    base_path = os.path.normpath(base_path)
    os.chdir(os.path.join(base_path, '..'))

    from _cntk_py import set_computation_network_trace_level, set_fixed_random_seed, force_deterministic_algorithms
    set_computation_network_trace_level(1)
    set_fixed_random_seed(
        1
    )  # BUGBUG: has no effect at present  # TODO: remove debugging facilities once this all works
    #force_deterministic_algorithms()
    # TODO: do the above; they lead to slightly different results, so not doing it for now

    reader_train = create_reader(os.path.join(base_path, 'train_map.txt'),
                                 os.path.join(base_path, 'CIFAR-10_mean.xml'),
                                 True, 0)
    reader_test = create_reader(os.path.join(base_path, 'test_map.txt'),
                                os.path.join(base_path, 'CIFAR-10_mean.xml'),
                                False)

    distributed_after_samples = 0
    num_quantization_bits = 32
    distributed_trainer = distributed.data_parallel_distributed_trainer(
        num_quantization_bits=num_quantization_bits,
        distributed_after=distributed_after_samples)

    test_error = convnet_cifar10_dataaug(reader_train,
                                         reader_test,
                                         distributed_trainer,
                                         max_epochs=1)
    expected_test_error = 0.617

    assert np.allclose(test_error,
                       expected_test_error,
                       atol=TOLERANCE_ABSOLUTE)
def train_and_evaluate(data_path, total_epochs, gpu_count=1):
    # Create distributed communicator for 1-bit SGD for better scaling to multiple GPUs
    # If you'd like to avoid quantization loss, use simple one instead
    quantization_bit = 1

    if (quantization_bit == 32):
        communicator = distributed.mpi_communicator()
    else:
        communicator = distributed.quantized_mpi_communicator(quantization_bit)

    workers = communicator.workers()
    current_worker = communicator.current_worker()
    print("List all distributed workers")
    for wk in workers:
        if current_worker.global_rank == wk.global_rank:
            print("* {} {}".format(wk.global_rank, wk.host_id))
        else:
            print("  {} {}".format(wk.global_rank, wk.host_id))

    if gpu_count == 1 and len(workers) > 1:
        print("Warning: running distributed training on 1-GPU will be slow")
        device.set_default_device(gpu(0))

    print("Training on device type:{} id:{}".format(
        'gpu' if device.default().type() else 'cpu',
        device.default().id()))

    start_model = "start_model.bin"
    num_start_epochs = 1
    num_parallel_epochs = total_epochs - num_start_epochs

    # training the start model only in one worker
    if communicator.current_worker().global_rank == 0:
        cifar_resnet_distributed(data_path,
                                 save_model_filename=start_model,
                                 communicator=None,
                                 run_test=False,
                                 num_epochs=num_start_epochs)

    communicator.barrier()

    # train in parallel
    error = cifar_resnet_distributed(data_path,
                                     load_model_filename=start_model,
                                     communicator=communicator,
                                     run_test=True,
                                     num_epochs=num_parallel_epochs)

    distributed.Communicator.finalize()
    return error
Example #37
0
def test_eval_one_hot_seq(one_hot_batch, device_id):
    dim = 10
    multiplier = 2

    from cntk.device import cpu, gpu, set_default_device
    set_default_device(gpu(0))
    for var_is_sparse in [True, False]: 
        in1 = input_variable(shape=(dim,), is_sparse=var_is_sparse)
        # Convert CNTK node value to dense so that we can compare it later
        z = times(in1, np.eye(dim)*multiplier)
        # Convert expectation to dense
        expected = [np.eye(dim)[seq]*multiplier for seq in one_hot_batch]
        batch = one_hot(one_hot_batch, num_classes=dim, device=cntk_device(device_id))
        result = z.eval({in1: batch}, device=cntk_device(device_id))
        assert np.all([np.allclose(a,b) for a,b in zip(result, expected)])
def test_transfer_learning(device_id):
    if cntk_device(device_id).type() != DeviceKind_GPU:
        pytest.skip('test only runs on GPU') # due to batch normalization in ResNet_18
    set_default_device(cntk_device(device_id))

    base_path = os.path.dirname(os.path.abspath(__file__))
    externalData = 'CNTK_EXTERNAL_TESTDATA_SOURCE_DIRECTORY' in os.environ
    if externalData:
        extPath = os.environ['CNTK_EXTERNAL_TESTDATA_SOURCE_DIRECTORY']
        print("Reading data and model from %s" % extPath)
        model_file = os.path.join(extPath, *"PreTrainedModels/ResNet/v1/ResNet_18.model".split("/"))
        map_file = os.path.join(extPath, *"Image/CIFAR/v0/cifar-10-batches-py/test_map.txt".split("/"))
    else:
        model_file = os.path.join(base_path, *"../../../../Examples/Image/PretrainedModels/ResNet_18.model".split("/"))
        map_file = os.path.join(base_path, *"../../../../Examples/Image/DataSets/CIFAR-10/test_map.txt".split("/"))

    os.chdir(os.path.join(os.path.dirname(map_file), '..'))

    feature_node_name = "features"
    last_hidden_node_name = "z.x"
    image_width = 224
    image_height = 224
    num_channels = 3
    num_classes = 10

    num_epochs = 10
    num_train_images = 10
    num_test_images = 2

    node_outputs = get_node_outputs(load_model(model_file))
    assert len(node_outputs) == 83

    output_file = os.path.join(base_path, "tl_output.txt")
    trained_model = train_model(model_file, feature_node_name, last_hidden_node_name,
                                image_width, image_height, num_channels, num_classes, map_file,
                                num_epochs=num_epochs, max_images=num_train_images, freeze=True)

    # since we do not use a reader for evaluation we need unzipped data
    grocery_path = prepare_Grocery_data()
    eval_map_file = os.path.join(grocery_path, "test.txt")
    os.chdir(grocery_path)
    eval_test_images(trained_model, output_file, eval_map_file, image_width, image_height,
                     max_images=num_test_images, column_offset=1)

    expected_output_file = os.path.join(base_path, "tl_expected_output.txt")
    output = np.fromfile(output_file)
    expected_output = np.fromfile(expected_output_file)
    assert np.allclose(output, expected_output, atol=TOLERANCE_ABSOLUTE)
Example #39
0
def test_initializer_init(device_id):
    from cntk.utils import cntk_device
    from cntk import cntk_py
    from cntk.device import set_default_device
    cntk_py.always_allow_setting_default_device()
    set_default_device(cntk_device(device_id))

    _check(uniform(scale=10), 'uniform')
    _check(gaussian(output_rank=1, filter_rank=2, scale=10), 'gaussian')
    _check(xavier(output_rank=1, filter_rank=2, scale=10), 'xavier')
    _check(glorot_uniform(output_rank=1, filter_rank=2, scale=10),
           'glorot_uniform')
    _check(glorot_normal(output_rank=1, filter_rank=2, scale=10),
           'glorot_normal')
    _check(he_uniform(output_rank=1, filter_rank=2, scale=10), 'he_uniform')
    _check(he_normal(output_rank=1, filter_rank=2, scale=10), 'he_normal')
Example #40
0
def test_eval_one_hot_seq(one_hot_batch, device_id):
    dim = 10
    multiplier = 2

    from cntk.device import cpu, gpu, set_default_device
    set_default_device(gpu(0))
    for var_is_sparse in [True, False]:
        in1 = input_variable(shape=(dim, ), is_sparse=var_is_sparse)
        # Convert CNTK node value to dense so that we can compare it later
        z = times(in1, np.eye(dim) * multiplier)
        # Convert expectation to dense
        expected = [np.eye(dim)[seq] * multiplier for seq in one_hot_batch]
        batch = one_hot(one_hot_batch,
                        num_classes=dim,
                        device=cntk_device(device_id))
        result = z.eval({in1: batch}, device=cntk_device(device_id))
        assert np.all([np.allclose(a, b) for a, b in zip(result, expected)])
Example #41
0
def test_cifar_resnet_distributed_error(device_id, is_1bit_sgd):
    if cntk_device(device_id).type() != DeviceKind_GPU:
        pytest.skip('test only runs on GPU')
    set_default_device(cntk_device(device_id))

    if not is_1bit_sgd:
        pytest.skip('test only runs in 1-bit SGD')

    try:
        base_path = os.path.join(
            os.environ['CNTK_EXTERNAL_TESTDATA_SOURCE_DIRECTORY'],
            *"Image/CIFAR/v0/cifar-10-batches-py".split("/"))
    except KeyError:
        base_path = os.path.join(
            *"../../../../Examples/Image/DataSets/CIFAR-10".split("/"))

    base_path = os.path.normpath(base_path)
    os.chdir(os.path.join(base_path, '..'))

    from _cntk_py import set_computation_network_trace_level, set_fixed_random_seed, force_deterministic_algorithms
    set_computation_network_trace_level(1)
    set_fixed_random_seed(
        1
    )  # BUGBUG: has no effect at present  # TODO: remove debugging facilities once this all works
    #force_deterministic_algorithms()
    # TODO: do the above; they lead to slightly different results, so not doing it for now

    distributed_learner_factory = lambda learner: distributed.data_parallel_distributed_learner(
        learner=learner, num_quantization_bits=32, distributed_after=0)

    reader_train_factory = lambda data_size: create_reader(
        os.path.join(base_path, 'train_map.txt'),
        os.path.join(base_path, 'CIFAR-10_mean.xml'), True, data_size)
    test_reader = create_reader(os.path.join(base_path, 'test_map.txt'),
                                os.path.join(base_path, 'CIFAR-10_mean.xml'),
                                False, FULL_DATA_SWEEP)

    test_error = train_and_evaluate(reader_train_factory, test_reader,
                                    'resnet20', 5, distributed_learner_factory)

    expected_test_error = 0.282

    assert np.allclose(test_error,
                       expected_test_error,
                       atol=TOLERANCE_ABSOLUTE)
    distributed.Communicator.finalize()
def test_cifar_resnet_distributed_error(device_id, is_1bit_sgd):
    if cntk_device(device_id).type() != DeviceKind_GPU:
        pytest.skip('test only runs on GPU')
    set_default_device(cntk_device(device_id))

    if not is_1bit_sgd:
        pytest.skip('test only runs in 1-bit SGD')

    try:
        base_path = os.path.join(
            os.environ['CNTK_EXTERNAL_TESTDATA_SOURCE_DIRECTORY'],
            *"Image/CIFAR/v0/cifar-10-batches-py".split("/"))
    except KeyError:
        base_path = os.path.join(
            *"../../../../Examples/Image/DataSets/CIFAR-10".split("/"))

    base_path = os.path.normpath(base_path)
    os.chdir(os.path.join(base_path, '..'))

    from _cntk_py import set_computation_network_trace_level, set_fixed_random_seed, force_deterministic_algorithms
    set_computation_network_trace_level(1)
    set_fixed_random_seed(
        1
    )  # BUGBUG: has no effect at present  # TODO: remove debugging facilities once this all works
    #force_deterministic_algorithms()
    # TODO: do the above; they lead to slightly different results, so not doing it for now

    train_data = os.path.join(base_path, 'train_map.txt')
    test_data = os.path.join(base_path, 'test_map.txt')
    mean_data = os.path.join(base_path, 'CIFAR-10_mean.xml')

    test_error = resnet_cifar10(train_data,
                                test_data,
                                mean_data,
                                'resnet20',
                                epoch_size=512,
                                max_epochs=2)

    # We are removing tolerance in error because running small epoch size has huge variance in accuracy. Will add
    # tolerance back once convolution operator is determinsitic.

    #    expected_test_error = 0.282

    #    assert np.allclose(test_error, expected_test_error,
    #                       atol=TOLERANCE_ABSOLUTE)
    distributed.Communicator.finalize()
Example #43
0
def test_transfer_learning(device_id):
    if cntk_device(device_id).type() != DeviceKind_GPU:
        pytest.skip('test only runs on GPU') # due to batch normalization in ResNet_18
    set_default_device(cntk_device(device_id))

    base_path = os.path.dirname(os.path.abspath(__file__))
    externalData = 'CNTK_EXTERNAL_TESTDATA_SOURCE_DIRECTORY' in os.environ
    if externalData:
        extPath = os.environ['CNTK_EXTERNAL_TESTDATA_SOURCE_DIRECTORY']
        print("Reading data and model from %s" % extPath)
        model_file = os.path.join(extPath, *"PreTrainedModels/ResNet/v1/ResNet_18.model".split("/"))
        map_file = os.path.join(extPath, *"Image/CIFAR/v0/cifar-10-batches-py/test_map.txt".split("/"))
    else:
        model_file = os.path.join(base_path, *"../../../../Examples/Image/PretrainedModels/ResNet_18.model".split("/"))
        map_file = os.path.join(base_path, *"../../../../Examples/Image/DataSets/CIFAR-10/test_map.txt".split("/"))

    os.chdir(os.path.join(os.path.dirname(map_file), '..'))

    feature_node_name = "features"
    last_hidden_node_name = "z.x"
    image_width = 224
    image_height = 224
    num_channels = 3
    num_classes = 10

    num_epochs = 10
    num_train_images = 10
    num_test_images = 2

    output_file = os.path.join(base_path, "tl_output.txt")
    trained_model = train_model(model_file, feature_node_name, last_hidden_node_name,
                                image_width, image_height, num_channels, num_classes, map_file,
                                num_epochs=num_epochs, max_images=num_train_images, freeze=True)

    # since we do not use a reader for evaluation we need unzipped data
    grocery_path = prepare_Grocery_data()
    eval_map_file = os.path.join(grocery_path, "test.txt")
    os.chdir(grocery_path)
    eval_test_images(trained_model, output_file, eval_map_file, image_width, image_height,
                     max_images=num_test_images, column_offset=1)

    expected_output_file = os.path.join(base_path, "tl_expected_output.txt")
    output = np.fromfile(output_file)
    expected_output = np.fromfile(expected_output_file)
    assert np.allclose(output, expected_output, atol=TOLERANCE_ABSOLUTE)
def test_cifar_convnet_error(device_id):
    if cntk_device(device_id).type() != DeviceKind_GPU:
        pytest.skip('test only runs on GPU')
    set_default_device(cntk_device(device_id))

    base_path = prepare_CIFAR10_data()
    # change dir to locate data.zip correctly
    os.chdir(base_path)

    from _cntk_py import set_fixed_random_seed, force_deterministic_algorithms
    set_fixed_random_seed(1)  # BUGBUG: has no effect at present  # TODO: remove debugging facilities once this all works
    #force_deterministic_algorithms()
    # TODO: do the above; they lead to slightly different results, so not doing it for now

    reader_train = create_reader(os.path.join(base_path, 'train_map.txt'), os.path.join(base_path, 'CIFAR-10_mean.xml'), True)
    reader_test  = create_reader(os.path.join(base_path, 'test_map.txt'), os.path.join(base_path, 'CIFAR-10_mean.xml'), False)

    test_error = convnet_cifar10_dataaug(reader_train, reader_test, epoch_size=256, max_epochs=1)
Example #45
0
def test_cifar_resnet_error(device_id):
    if cntk_device(device_id).type() != DeviceKind_GPU:
        pytest.skip('test only runs on GPU')
    set_default_device(cntk_device(device_id))

    try:
        base_path = os.path.join(os.environ['CNTK_EXTERNAL_TESTDATA_SOURCE_DIRECTORY'],
                                *"Image/CIFAR/v0/cifar-10-batches-py".split("/"))
    except KeyError:
        base_path = os.path.join(
            *"../../../../Examples/Image/Datasets/CIFAR-10/cifar-10-batches-py".split("/"))

    base_path = os.path.normpath(base_path)
    os.chdir(os.path.join(base_path, '..'))

    test_error = cifar_resnet(base_path)
    expected_test_error = 0.7

    assert np.allclose(test_error, expected_test_error,
                       atol=TOLERANCE_ABSOLUTE)
def test_cifar_resnet_error(device_id):
    if cntk_device(device_id).type() != DeviceKind_GPU:
        pytest.skip('test only runs on GPU')
    set_default_device(cntk_device(device_id))
    
    base_path = prepare_CIFAR10_data()
    # change dir to locate data.zip correctly
    os.chdir(base_path)

    from _cntk_py import set_computation_network_trace_level, set_fixed_random_seed, force_deterministic_algorithms
    set_computation_network_trace_level(1)
    set_fixed_random_seed(1)  # BUGBUG: has no effect at present  # TODO: remove debugging facilities once this all works
    #force_deterministic_algorithms()
    # TODO: do the above; they lead to slightly different results, so not doing it for now

    reader_train = create_reader(os.path.join(base_path, 'train_map.txt'), os.path.join(base_path, 'CIFAR-10_mean.xml'), True)
    reader_test  = create_reader(os.path.join(base_path, 'test_map.txt'), os.path.join(base_path, 'CIFAR-10_mean.xml'), False)

    # Create a path to TensorBoard log directory and make sure it does not exist.
    abs_path = os.path.dirname(os.path.abspath(__file__))
    tb_logdir = os.path.join(abs_path, 'TrainResNet_CIFAR10_test_log')
    if os.path.exists(tb_logdir):
        shutil.rmtree(tb_logdir)

    test_error = train_and_evaluate(reader_train, reader_test, 'resnet20', epoch_size=512, max_epochs=1,
                                    tensorboard_logdir=tb_logdir)

# We are removing tolerance in error because running small epoch size has huge variance in accuracy. Will add
# tolerance back once convolution operator is determinsitic. 
    
#    expected_test_error = 0.282

#    assert np.allclose(test_error, expected_test_error,
#                       atol=TOLERANCE_ABSOLUTE)

    files = 0
    for file in os.listdir(tb_logdir):
        assert file.startswith("events.out.tfevents")
        files += 1
    assert files == 1
def train_and_evaluate(data_path, total_epochs, gpu_count=1):
    # Create distributed communicator for 1-bit SGD for better scaling to multiple GPUs
    # If you'd like to avoid quantization loss, use simple one instead
    quantization_bit = 1

    if (quantization_bit == 32):
        communicator = distributed.mpi_communicator()
    else:
        communicator = distributed.quantized_mpi_communicator(quantization_bit)

    workers = communicator.workers()
    current_worker = communicator.current_worker()
    print("List all distributed workers")
    for wk in workers:
        if current_worker.global_rank == wk.global_rank:
            print("* {} {}".format(wk.global_rank, wk.host_id))
        else:
            print("  {} {}".format(wk.global_rank, wk.host_id))

    if gpu_count == 1 and len(workers) > 1 :
        print("Warning: running distributed training on 1-GPU will be slow")
        device.set_default_device(gpu(0))

    print("Training on device type:{} id:{}".format('gpu' if device.default().type() else 'cpu', device.default().id()))

    start_model = "start_model.bin"
    num_start_epochs = 1
    num_parallel_epochs = total_epochs - num_start_epochs

    # training the start model only in one worker
    if communicator.current_worker().global_rank == 0:
        cifar_resnet_distributed(data_path, save_model_filename=start_model, communicator=None, run_test=False, num_epochs=num_start_epochs)
    
    communicator.barrier()
    
    # train in parallel
    error = cifar_resnet_distributed(data_path, load_model_filename=start_model, communicator=communicator, run_test=True, num_epochs=num_parallel_epochs)

    distributed.Communicator.finalize()
    return error
Example #48
0
def test_simple_mnist_error(device_id):
    # Create a path to TensorBoard log directory and make sure it does not exist.
    abs_path = os.path.dirname(os.path.abspath(__file__))
    tb_logdir = os.path.join(abs_path, 'simple_mnist_test_log')
    if os.path.exists(tb_logdir):
        shutil.rmtree(tb_logdir)

    from cntk.ops.tests.ops_test_utils import cntk_device
    set_default_device(cntk_device(device_id))

    test_error = simple_mnist(tb_logdir)
    expected_test_error = 0.09

    assert np.allclose(test_error, expected_test_error,
                       atol=TOLERANCE_ABSOLUTE)

    # Ensure that the TensorBoard log directory was created and contains exactly one file with the expected name.
    tb_files = 0
    for tb_file in os.listdir(tb_logdir):
        assert tb_file.startswith("events.out.tfevents")
        tb_files += 1
    assert tb_files == 1
def test_cifar_resnet_distributed_error(device_id, is_1bit_sgd):
    if cntk_device(device_id).type() != DeviceKind_GPU:
        pytest.skip('test only runs on GPU')
    set_default_device(cntk_device(device_id))

    if not is_1bit_sgd:
        pytest.skip('test only runs in 1-bit SGD')

    try:
        base_path = os.path.join(os.environ['CNTK_EXTERNAL_TESTDATA_SOURCE_DIRECTORY'],
                                *"Image/CIFAR/v0/cifar-10-batches-py".split("/"))
    except KeyError:
        base_path = os.path.join(
            *"../../../../Examples/Image/DataSets/CIFAR-10".split("/"))

    base_path = os.path.normpath(base_path)
    os.chdir(os.path.join(base_path, '..'))

    from _cntk_py import set_computation_network_trace_level, set_fixed_random_seed, force_deterministic_algorithms
    set_computation_network_trace_level(1)
    set_fixed_random_seed(1)  # BUGBUG: has no effect at present  # TODO: remove debugging facilities once this all works
    #force_deterministic_algorithms()
    # TODO: do the above; they lead to slightly different results, so not doing it for now

    distributed_learner_factory = lambda learner: distributed.data_parallel_distributed_learner(
        learner=learner,
        num_quantization_bits=32,
        distributed_after=0)

    reader_train_factory = lambda data_size: create_reader(os.path.join(base_path, 'train_map.txt'), os.path.join(base_path, 'CIFAR-10_mean.xml'), True, data_size)
    test_reader = create_reader(os.path.join(base_path, 'test_map.txt'), os.path.join(base_path, 'CIFAR-10_mean.xml'), False, FULL_DATA_SWEEP)

    test_error = train_and_evaluate(reader_train_factory, test_reader, 'resnet20', 5, distributed_learner_factory)

    expected_test_error = 0.282

    assert np.allclose(test_error, expected_test_error,
                       atol=TOLERANCE_ABSOLUTE)
    distributed.Communicator.finalize()
Example #50
0
def test_cifar_resnet_error(device_id):
    if cntk_device(device_id).type() != DeviceKind_GPU:
        pytest.skip('test only runs on GPU')
    set_default_device(cntk_device(device_id))

    try:
        base_path = os.path.join(
            os.environ['CNTK_EXTERNAL_TESTDATA_SOURCE_DIRECTORY'],
            *"Image/CIFAR/v0/cifar-10-batches-py".split("/"))
    except KeyError:
        base_path = os.path.join(
            *"../../../../Examples/Image/Datasets/CIFAR-10/cifar-10-batches-py"
            .split("/"))

    base_path = os.path.normpath(base_path)
    os.chdir(os.path.join(base_path, '..'))

    test_error = cifar_resnet(base_path)
    expected_test_error = 0.7

    assert np.allclose(test_error,
                       expected_test_error,
                       atol=TOLERANCE_ABSOLUTE)
    parser.add_argument('-logdir', '--logdir', help='Log file', required=False, default=None)
    parser.add_argument('-e', '--epochs', help='Total number of epochs to train', type=int, required=False, default='160')
    parser.add_argument('-q', '--quantized_bits', help='Number of quantized bits used for gradient aggregation', type=int, required=False, default='32')
    parser.add_argument('-a', '--distributed_after', help='Number of samples to train with before running distributed', type=int, required=False, default='0')
    parser.add_argument('-ms', '--minibatch_size', help='Minibatch size', type=int, required=False, default='16')
    parser.add_argument('-b', '--block_samples', type=int, help="Number of samples per block for block momentum (BM) distributed learner (if 0 BM learner is not used)", required=False, default=None)
    parser.add_argument('-es', '--epoch_size', help='Epoch size', type=int, required=False, default='64')
    parser.add_argument('-r', '--randomize_data', help='Randomize training data', type=bool, required=False, default=False)
    parser.add_argument('-device', '--device', type=int, help="Force to run the script on a specified device", required=False, default=None)

    args = vars(parser.parse_args())

    if args['outputdir'] is not None:
        model_path = args['outputdir'] + "/models"
    if args['device'] is not None:
        set_default_device(gpu(args['device']))

    data_path = args['datadir']

    if not os.path.isdir(data_path):
        raise RuntimeError("Directory %s does not exist" % data_path)

    train_data = os.path.join(data_path, 'cmudict-0.7b.train-dev-20-21.ctf')
    test_data = os.path.join(data_path, 'cmudict-0.7b.test.ctf')

    try:
        sequence_to_sequence_translator(train_data, test_data,
                                        epoch_size=args['epoch_size'],
                                        num_quantization_bits=args['quantized_bits'],
                                        block_size=args['block_samples'],
                                        warm_up=args['distributed_after'],
Example #52
0
                true_label = int(tokens[1 + column_offset])
                predicted_label = np.argmax(probs)
                if predicted_label == true_label:
                    correct_count += 1

                np.savetxt(results_file, probs[np.newaxis], fmt="%.3f")
                if pred_count % 100 == 0:
                    print("Processed {0} samples ({1} correct)".format(pred_count, (correct_count / pred_count)))
                if pred_count >= num_images:
                    break

    print ("{0} of {1} prediction were correct {2}.".format(correct_count, pred_count, (correct_count / pred_count)))


if __name__ == '__main__':
    set_default_device(gpu(0))
    # check for model and data existence
    if not (os.path.exists(_base_model_file) and os.path.exists(_train_map_file) and os.path.exists(_test_map_file)):
        print("Please run 'python install_data_and_model.py' first to get the required data and model.")
        exit(0)

    # You can use the following to inspect the base model and determine the desired node names
    # node_outputs = get_node_outputs(load_model(_base_model_file))
    # for out in node_outputs: print("{0} {1}".format(out.name, out.shape))

    # Train only if no model exists yet or if make_mode is set to False
    if os.path.exists(tl_model_file) and make_mode:
        print("Loading existing model from %s" % tl_model_file)
        trained_model = load_model(tl_model_file)
    else:
        trained_model = train_model(_base_model_file, _feature_node_name, _last_hidden_node_name,
    except KeyError:
        base_path = os.path.join(os.path.dirname(os.path.abspath(__file__)),
                                *"../../../../Examples/Image/DataSets/CIFAR-10".split("/"))

    base_path = os.path.normpath(base_path)
    os.chdir(os.path.join(base_path, '..'))

    from _cntk_py import set_computation_network_trace_level, set_fixed_random_seed, force_deterministic_algorithms
    set_computation_network_trace_level(1) 
    set_fixed_random_seed(1)  # BUGBUG: has no effect at present  # TODO: remove debugging facilities once this all works
    #force_deterministic_algorithms()
    # TODO: do the above; they lead to slightly different results, so not doing it for now

    create_train_reader = lambda data_size: create_reader(os.path.join(base_path, 'train_map.txt'), os.path.join(base_path, 'CIFAR-10_mean.xml'), True, data_size, 0)
    test_reader = create_reader(os.path.join(base_path, 'test_map.txt'), os.path.join(base_path, 'CIFAR-10_mean.xml'), False, FULL_DATA_SWEEP)

    distributed_after_samples = 0
    num_quantization_bits = 32
    create_dist_learner = lambda learner: distributed.data_parallel_distributed_learner(
        learner=learner,
        num_quantization_bits=num_quantization_bits,
        distributed_after=distributed_after_samples)

    return convnet_cifar10_dataaug(create_train_reader, test_reader, create_dist_learner, max_epochs=1, num_mbs_per_log=None)

if __name__=='__main__':
    assert distributed.Communicator.rank() < distributed.Communicator.num_workers()
    set_default_device(gpu(0)) # force using GPU-0 in test for speed
    run_cifar_convnet_distributed()
    distributed.Communicator.finalize()