def test_cifar_convnet_error(device_id): if cntk_device(device_id).type() != DeviceKind_GPU: pytest.skip('test only runs on GPU') try_set_default_device(cntk_device(device_id)) base_path = prepare_CIFAR10_data() # change dir to locate data.zip correctly os.chdir(base_path) from _cntk_py import set_fixed_random_seed, force_deterministic_algorithms set_fixed_random_seed(1) force_deterministic_algorithms() reader_train1 = create_reader(os.path.join(base_path, 'train_map.txt'), os.path.join(base_path, 'CIFAR-10_mean.xml'), False) reader_test1 = create_reader(os.path.join(base_path, 'test_map.txt'), os.path.join(base_path, 'CIFAR-10_mean.xml'), False) model1 = create_convnet_cifar10_model(num_classes=10) train_loss1 = train_model(reader_train1, reader_test1, model1, epoch_size=128, max_epochs=1)
def test_bn_inception_cifar(device_id): if cntk_device(device_id).type() != DeviceKind_GPU: pytest.skip('test only runs on GPU') try_set_default_device(cntk_device(device_id)) current_path = os.getcwd() base_path = prepare_CIFAR10_data() # change dir to locate data.zip correctly os.chdir(base_path) from _cntk_py import set_fixed_random_seed, force_deterministic_algorithms set_fixed_random_seed(1) force_deterministic_algorithms() mean_data = os.path.join(base_path, 'CIFAR-10_mean.xml') train_data = os.path.join(base_path, 'train_map.txt') test_data = os.path.join(base_path, 'test_map.txt') try: error = bn_inception_train_and_eval(train_data, test_data, mean_data, minibatch_size=16, epoch_size=500, max_epochs=8, restore=False, testing_parameters=(500,16)) finally: os.chdir(current_path) expected_error = 0.88 assert np.allclose(error, expected_error, atol=TOLERANCE_ABSOLUTE)
def test_bn_inception_cifar(device_id): if cntk_device(device_id).type() != DeviceKind_GPU: pytest.skip('test only runs on GPU') try_set_default_device(cntk_device(device_id)) current_path = os.getcwd() base_path = prepare_CIFAR10_data() # change dir to locate data.zip correctly os.chdir(base_path) from _cntk_py import set_fixed_random_seed, force_deterministic_algorithms set_fixed_random_seed(1) force_deterministic_algorithms() mean_data = os.path.join(base_path, 'CIFAR-10_mean.xml') train_data = os.path.join(base_path, 'train_map.txt') test_data = os.path.join(base_path, 'test_map.txt') try: error = bn_inception_train_and_eval(train_data, test_data, mean_data, minibatch_size=16, epoch_size=500, max_epochs=8, restore=False, testing_parameters=(500, 16)) finally: os.chdir(current_path) expected_error = 0.88 assert np.allclose(error, expected_error, atol=TOLERANCE_ABSOLUTE)
def test_inception_v3_imagenet(device_id): if cntk_device(device_id).type() != DeviceKind_GPU: pytest.skip('test only runs on GPU') try_set_default_device(cntk_device(device_id)) current_path = os.getcwd() base_path = prepare_ImageNet_data() # change dir to locate data.zip correctly os.chdir(base_path) from _cntk_py import set_fixed_random_seed, force_deterministic_algorithms set_fixed_random_seed(1) force_deterministic_algorithms() train_data = os.path.join(base_path, 'train_map.txt') test_data = os.path.join(base_path, 'val_map.txt') try: error = inception_v3_train_and_eval(train_data, test_data, minibatch_size=8, epoch_size=200, max_epochs=4, restore=False, testing_parameters=(200, 8)) finally: os.chdir(current_path) expected_error = 0.99 assert np.allclose(error, expected_error, atol=TOLERANCE_ABSOLUTE)
def test_inception_v3_imagenet(device_id): if cntk_device(device_id).type() != DeviceKind_GPU: pytest.skip('test only runs on GPU') try_set_default_device(cntk_device(device_id)) current_path = os.getcwd() base_path = prepare_ImageNet_data() # change dir to locate data.zip correctly os.chdir(base_path) from _cntk_py import set_fixed_random_seed, force_deterministic_algorithms set_fixed_random_seed(1) force_deterministic_algorithms() train_data = os.path.join(base_path, 'train_map.txt') test_data = os.path.join(base_path, 'val_map.txt') try: error = inception_v3_train_and_eval(train_data, test_data, minibatch_size=8, epoch_size=200, max_epochs=4, restore=False, testing_parameters=(200, 8)) finally: os.chdir(current_path) expected_error = 0.99 assert np.allclose(error, expected_error, atol=TOLERANCE_ABSOLUTE)
def test_cifar_convnet_error(device_id): if cntk_device(device_id).type() != DeviceKind_GPU: pytest.skip('test only runs on GPU') try_set_default_device(cntk_device(device_id)) base_path = prepare_CIFAR10_data() # change dir to locate data.zip correctly os.chdir(base_path) from _cntk_py import set_fixed_random_seed, force_deterministic_algorithms set_fixed_random_seed(1) force_deterministic_algorithms() reader_train = create_reader(os.path.join(base_path, 'train_map.txt'), os.path.join(base_path, 'CIFAR-10_mean.xml'), False) model = create_convnet_cifar10_model(num_classes=10) model.update_signature((num_channels, image_height, image_width)) criterion = create_criterion_function(model, normalize=lambda x: x / 256) train_loss, metric = train_model(reader_train, model, criterion, epoch_size=128, max_epochs=5) expected_loss_metric = (2.2963, 0.9062) assert np.allclose((train_loss, metric), expected_loss_metric, atol=TOLERANCE_ABSOLUTE)
def prepare(cfg, use_arg_parser=True): cfg.MB_SIZE = 1 cfg.NUM_CHANNELS = 3 cfg.OUTPUT_PATH = os.path.join(cfg.DATA.MAP_FILE_PATH, "results", datetime.now().strftime("%d-%m-%Y-%H-%M")) # cfg["DATA"].MAP_FILE_PATH = os.path.join(abs_path, cfg["DATA"].MAP_FILE_PATH) running_locally = os.path.exists(cfg["DATA"].MAP_FILE_PATH) if not running_locally: # disable debug and plot outputs when running on GPU cluster cfg["CNTK"].DEBUG_OUTPUT = False cfg.VISUALIZE_RESULTS = False if use_arg_parser: parse_arguments(cfg) data_path = cfg["DATA"].MAP_FILE_PATH if not os.path.isdir(data_path): raise RuntimeError("Directory %s does not exist" % data_path) cfg["DATA"].CLASS_MAP_FILE = os.path.join(data_path, cfg["DATA"].CLASS_MAP_FILE) cfg["DATA"].TRAIN_MAP_FILE = os.path.join(data_path, cfg["DATA"].TRAIN_MAP_FILE) cfg["DATA"].TEST_MAP_FILE = os.path.join(data_path, cfg["DATA"].TEST_MAP_FILE) cfg["DATA"].TRAIN_ROI_FILE = os.path.join(data_path, cfg["DATA"].TRAIN_ROI_FILE) cfg["DATA"].TEST_ROI_FILE = os.path.join(data_path, cfg["DATA"].TEST_ROI_FILE) cfg['MODEL_PATH'] = os.path.join(cfg.OUTPUT_PATH, "faster_rcnn_eval_{}_{}_{}.model" .format(cfg["MODEL"].BASE_MODEL, "e2e" if cfg["CNTK"].TRAIN_E2E else "4stage", cfg.DATA.DATASET)) cfg['BASE_MODEL_PATH'] = os.path.join(cfg['DATA'].MAP_FILE_PATH, "pretrained-models", cfg["MODEL"].BASE_MODEL_FILE) cfg["DATA"].CLASSES = parse_class_map_file(cfg["DATA"].CLASS_MAP_FILE) cfg["DATA"].NUM_CLASSES = len(cfg["DATA"].CLASSES) if cfg["CNTK"].FAST_MODE: cfg["CNTK"].E2E_MAX_EPOCHS = 1 cfg["CNTK"].RPN_EPOCHS = 1 cfg["CNTK"].FRCN_EPOCHS = 1 if cfg["CNTK"].FORCE_DETERMINISTIC: force_deterministic_algorithms() np.random.seed(seed=cfg.RND_SEED) if False and cfg["CNTK"].DEBUG_OUTPUT: # report args print("Using the following parameters:") print("Flip image : {}".format(cfg["TRAIN"].USE_FLIPPED)) print("Train conv layers: {}".format(cfg.TRAIN_CONV_LAYERS)) print("Random seed : {}".format(cfg.RND_SEED)) print("Momentum per MB : {}".format(cfg["CNTK"].MOMENTUM_PER_MB)) if cfg["CNTK"].TRAIN_E2E: print("E2E epochs : {}".format(cfg["CNTK"].E2E_MAX_EPOCHS)) else: print("RPN lr factor : {}".format(cfg["CNTK"].RPN_LR_FACTOR)) print("RPN epochs : {}".format(cfg["CNTK"].RPN_EPOCHS)) print("FRCN lr factor : {}".format(cfg["CNTK"].FRCN_LR_FACTOR)) print("FRCN epochs : {}".format(cfg["CNTK"].FRCN_EPOCHS))
def test_binary_convnet_error(device_id): if cntk_device(device_id).type() != DeviceKind_GPU: pytest.skip('test only runs on GPU') try_set_default_device(cntk_device(device_id)) base_path = prepare_CIFAR10_data() # change dir to locate data.zip correctly os.chdir(base_path) from _cntk_py import set_fixed_random_seed, force_deterministic_algorithms set_fixed_random_seed(1) force_deterministic_algorithms() reader_train = create_reader(os.path.join(base_path, 'train_map.txt'), os.path.join(base_path, 'CIFAR-10_mean.xml'), False) model = create_binary_convolution_model() z, criterion = get_z_and_criterion(model) train_loss, metric = train_model(reader_train, z, criterion, epoch_size=8192, max_epochs=5) expected_loss_metric = (2.677057718858123, 0.6043701171875) assert np.allclose((train_loss, metric), expected_loss_metric, atol=TOLERANCE_ABSOLUTE) # save and load (as an illustration) model_path = "model.cmf" model.save(model_path) eval_device = C.cpu() model = Function.load(model_path, device=eval_device) # test model_with_native_binary_convolutions = clone_with_native_binary_convolutions( model) _, criterion = get_z_and_criterion(model_with_native_binary_convolutions) reader_test = create_reader(os.path.join(base_path, 'test_map.txt'), os.path.join(base_path, 'CIFAR-10_mean.xml'), False) test_loss, metric = evaluate(reader_test, criterion, device=eval_device, minibatch_size=1, max_samples=200) expected_loss_metric = (0.0, 0.695) assert np.allclose((test_loss, metric), expected_loss_metric, atol=TOLERANCE_ABSOLUTE)
def prepare(cfg, use_arg_parser=True): cfg.MB_SIZE = 1 cfg.NUM_CHANNELS = 3 cfg.OUTPUT_PATH = os.path.join(abs_path, "Output") cfg["DATA"].MAP_FILE_PATH = os.path.join(abs_path, cfg["DATA"].MAP_FILE_PATH) running_locally = os.path.exists(cfg["DATA"].MAP_FILE_PATH) if running_locally: os.chdir(cfg["DATA"].MAP_FILE_PATH) if not os.path.exists(os.path.join(abs_path, "Output")): os.makedirs(os.path.join(abs_path, "Output")) if not os.path.exists(os.path.join(abs_path, "Output", cfg["DATA"].DATASET)): os.makedirs(os.path.join(abs_path, "Output", cfg["DATA"].DATASET)) else: # disable debug and plot outputs when running on GPU cluster cfg["CNTK"].DEBUG_OUTPUT = False cfg.VISUALIZE_RESULTS = False if use_arg_parser: parse_arguments(cfg) data_path = cfg["DATA"].MAP_FILE_PATH if not os.path.isdir(data_path): raise RuntimeError("Directory %s does not exist" % data_path) cfg["DATA"].CLASS_MAP_FILE = os.path.join(data_path, cfg["DATA"].CLASS_MAP_FILE) cfg["DATA"].TRAIN_MAP_FILE = os.path.join(data_path, cfg["DATA"].TRAIN_MAP_FILE) cfg["DATA"].TEST_MAP_FILE = os.path.join(data_path, cfg["DATA"].TEST_MAP_FILE) cfg["DATA"].TRAIN_ROI_FILE = os.path.join(data_path, cfg["DATA"].TRAIN_ROI_FILE) cfg["DATA"].TEST_ROI_FILE = os.path.join(data_path, cfg["DATA"].TEST_ROI_FILE) if cfg.USE_PRECOMPUTED_PROPOSALS: try: cfg["DATA"].TRAIN_PRECOMPUTED_PROPOSALS_FILE = os.path.join(data_path, cfg["DATA"].TRAIN_PRECOMPUTED_PROPOSALS_FILE) except: print("To use precomputed proposals please specify the following parameters in your configuration:\n" "__C.DATA.TRAIN_PRECOMPUTED_PROPOSALS_FILE\n" "__C.DATA.TEST_PRECOMPUTED_PROPOSALS_FILE") exit(-1) cfg['MODEL_PATH'] = os.path.join(cfg.OUTPUT_PATH, "fast_rcnn_eval_{}.model".format(cfg["MODEL"].BASE_MODEL)) cfg['BASE_MODEL_PATH'] = os.path.join(os.path.dirname(os.path.abspath(__file__)), "..", "..", "..", "..", "PretrainedModels", cfg["MODEL"].BASE_MODEL_FILE) cfg["DATA"].CLASSES = parse_class_map_file(cfg["DATA"].CLASS_MAP_FILE) cfg["DATA"].NUM_CLASSES = len(cfg["DATA"].CLASSES) if cfg["CNTK"].FAST_MODE: cfg["CNTK"].MAX_EPOCHS = 1 if cfg["CNTK"].FORCE_DETERMINISTIC: force_deterministic_algorithms() np.random.seed(seed=cfg.RND_SEED)
def prepare(cfg, use_arg_parser=True): cfg.MB_SIZE = 1 cfg.NUM_CHANNELS = 3 cfg.OUTPUT_PATH = os.path.join(abs_path, "Output") cfg["DATA"].MAP_FILE_PATH = os.path.join(abs_path, cfg["DATA"].MAP_FILE_PATH) running_locally = os.path.exists(cfg["DATA"].MAP_FILE_PATH) if running_locally: os.chdir(cfg["DATA"].MAP_FILE_PATH) if not os.path.exists(os.path.join(abs_path, "Output")): os.makedirs(os.path.join(abs_path, "Output")) if not os.path.exists(os.path.join(abs_path, "Output", cfg["DATA"].DATASET)): os.makedirs(os.path.join(abs_path, "Output", cfg["DATA"].DATASET)) else: # disable debug and plot outputs when running on GPU cluster cfg["CNTK"].DEBUG_OUTPUT = False cfg.VISUALIZE_RESULTS = False if use_arg_parser: parse_arguments(cfg) data_path = cfg["DATA"].MAP_FILE_PATH if not os.path.isdir(data_path): raise RuntimeError("Directory %s does not exist" % data_path) cfg["DATA"].CLASS_MAP_FILE = os.path.join(data_path, cfg["DATA"].CLASS_MAP_FILE) cfg["DATA"].TRAIN_MAP_FILE = os.path.join(data_path, cfg["DATA"].TRAIN_MAP_FILE) cfg["DATA"].TEST_MAP_FILE = os.path.join(data_path, cfg["DATA"].TEST_MAP_FILE) cfg["DATA"].TRAIN_ROI_FILE = os.path.join(data_path, cfg["DATA"].TRAIN_ROI_FILE) cfg["DATA"].TEST_ROI_FILE = os.path.join(data_path, cfg["DATA"].TEST_ROI_FILE) if cfg.USE_PRECOMPUTED_PROPOSALS: try: cfg["DATA"].TRAIN_PRECOMPUTED_PROPOSALS_FILE = os.path.join(data_path, cfg["DATA"].TRAIN_PRECOMPUTED_PROPOSALS_FILE) except: print("To use precomputed proposals please specify the following parameters in your configuration:\n" "__C.DATA.TRAIN_PRECOMPUTED_PROPOSALS_FILE\n" "__C.DATA.TEST_PRECOMPUTED_PROPOSALS_FILE") exit(-1) cfg['MODEL_PATH'] = os.path.join(cfg.OUTPUT_PATH, "fast_rcnn_eval_{}.model".format(cfg["MODEL"].BASE_MODEL)) cfg['BASE_MODEL_PATH'] = os.path.join(os.path.dirname(os.path.abspath(__file__)), "..", "..", "..", "..", "PretrainedModels", cfg["MODEL"].BASE_MODEL_FILE) cfg["DATA"].CLASSES = parse_class_map_file(cfg["DATA"].CLASS_MAP_FILE) cfg["DATA"].NUM_CLASSES = len(cfg["DATA"].CLASSES) if cfg["CNTK"].FAST_MODE: cfg["CNTK"].MAX_EPOCHS = 1 if cfg["CNTK"].FORCE_DETERMINISTIC: force_deterministic_algorithms() np.random.seed(seed=cfg.RND_SEED)
def force_deterministic(seed): ''' Force most of the computation nodes to run deterministically. Args: seed (int): set the random seed for all random ops in the graph and readers. ''' from _cntk_py import set_fixed_random_seed, force_deterministic_algorithms import warnings warnings.warn("RNN based nodes don't run deterministically yet.", Warning) set_fixed_random_seed(seed) force_deterministic_algorithms()
def force_deterministic(seed): ''' Force most of the computation nodes to run deterministically. Args: seed (int): set the random seed for all random ops in the graph and readers. ''' from _cntk_py import set_fixed_random_seed, force_deterministic_algorithms import warnings warnings.warn("pooling nodes and RNN based nodes don't run deterministically yet.", Warning) set_fixed_random_seed(seed) force_deterministic_algorithms()
def test_binary_convnet_error(device_id): if not native_convolve_function_registered: pytest.skip("Could not find {0} library. " "Please check if HALIDE_PATH is configured properly " "and try building {1} again" .format('Cntk.BinaryConvolution-' + C.__version__.rstrip('+'), 'Extnsibiliy\\BinaryConvolution')) if cntk_device(device_id).type() != DeviceKind_GPU: pytest.skip('test only runs on GPU') try_set_default_device(cntk_device(device_id)) base_path = prepare_CIFAR10_data() # change dir to locate data.zip correctly os.chdir(base_path) from _cntk_py import set_fixed_random_seed, force_deterministic_algorithms set_fixed_random_seed(1) force_deterministic_algorithms() reader_train = create_reader(os.path.join(base_path, 'train_map.txt'), os.path.join(base_path, 'CIFAR-10_mean.xml'), False) model = create_binary_convolution_model() z, criterion = get_z_and_criterion(model) train_loss, metric = train_model(reader_train, z, criterion, epoch_size=8192, max_epochs=5) expected_loss_metric = (2.677057718858123, 0.6043701171875) assert np.allclose((train_loss, metric), expected_loss_metric, atol=TOLERANCE_ABSOLUTE) # save and load (as an illustration) model_path = "model.cmf" model.save(model_path) eval_device = C.cpu() model = Function.load(model_path, device=eval_device) # test model_with_native_binary_convolutions = clone_with_native_binary_convolutions(model) _, criterion = get_z_and_criterion(model_with_native_binary_convolutions) reader_test = create_reader(os.path.join(base_path, 'test_map.txt'), os.path.join(base_path, 'CIFAR-10_mean.xml'), False) test_loss, metric = evaluate(reader_test, criterion, device=eval_device, minibatch_size=1, max_samples=200) expected_loss_metric = (0.0, 0.695) assert np.allclose((test_loss, metric), expected_loss_metric, atol=TOLERANCE_ABSOLUTE)
def test_cifar_convnet_error(device_id): if cntk_device(device_id).type() != DeviceKind_GPU: pytest.skip('test only runs on GPU') try_set_default_device(cntk_device(device_id)) base_path = prepare_CIFAR10_data() # change dir to locate data.zip correctly os.chdir(base_path) from _cntk_py import set_fixed_random_seed, force_deterministic_algorithms set_fixed_random_seed(1) force_deterministic_algorithms() reader_train = create_reader(os.path.join(base_path, 'train_map.txt'), os.path.join(base_path, 'CIFAR-10_mean.xml'), False) model = create_convnet_cifar10_model(num_classes=10) model.update_signature((num_channels, image_height, image_width)) criterion = create_criterion_function(model, normalize=lambda x: x / 256) train_loss, metric = train_model(reader_train, model, criterion, epoch_size=128, max_epochs=5) expected_loss_metric = (2.2963, 0.9062) assert np.allclose((train_loss, metric), expected_loss_metric, atol=TOLERANCE_ABSOLUTE)
def test_binary_convnet_error(device_id): if cntk_device(device_id).type() != DeviceKind_GPU: pytest.skip('test only runs on GPU') try_set_default_device(cntk_device(device_id)) base_path = prepare_CIFAR10_data() # change dir to locate data.zip correctly os.chdir(base_path) from _cntk_py import set_fixed_random_seed, force_deterministic_algorithms set_fixed_random_seed(1) force_deterministic_algorithms() reader_train = create_reader(os.path.join(base_path, 'train_map.txt'), os.path.join(base_path, 'CIFAR-10_mean.xml'), False) model = create_binary_convolution_model() z, criterion = get_z_and_criterion(model) train_loss, metric = train_model(reader_train, z, criterion, epoch_size=8192, max_epochs=5) expected_loss_metric = (2.677057718858123, 0.6043701171875) assert np.allclose((train_loss, metric), expected_loss_metric, atol=TOLERANCE_ABSOLUTE) # save and load (as an illustration) model_path = "model.cmf" model.save(model_path) eval_device = C.cpu() model = Function.load(model_path, device=eval_device) # test model_with_native_binary_convolutions = clone_with_native_binary_convolutions(model) _, criterion = get_z_and_criterion(model_with_native_binary_convolutions) reader_test = create_reader(os.path.join(base_path, 'test_map.txt'), os.path.join(base_path, 'CIFAR-10_mean.xml'), False) test_loss, metric = evaluate(reader_test, criterion, device=eval_device, minibatch_size=1, max_samples=200) expected_loss_metric = (0.0, 0.695) assert np.allclose((test_loss, metric), expected_loss_metric, atol=TOLERANCE_ABSOLUTE)
def prepare(cfg, use_arg_parser=True): cfg.MB_SIZE = 1 cfg.NUM_CHANNELS = 3 cfg.OUTPUT_PATH = os.path.join(abs_path, "Output") cfg["DATA"].MAP_FILE_PATH = os.path.join(abs_path, cfg["DATA"].MAP_FILE_PATH) running_locally = os.path.exists(cfg["DATA"].MAP_FILE_PATH) if running_locally: os.chdir(cfg["DATA"].MAP_FILE_PATH) if not os.path.exists(os.path.join(abs_path, "Output")): os.makedirs(os.path.join(abs_path, "Output")) if not os.path.exists(os.path.join(abs_path, "Output", cfg["DATA"].DATASET)): os.makedirs(os.path.join(abs_path, "Output", cfg["DATA"].DATASET)) else: # disable debug and plot outputs when running on GPU cluster cfg["CNTK"].DEBUG_OUTPUT = False cfg.VISUALIZE_RESULTS = False if use_arg_parser: parse_arguments(cfg) data_path = cfg["DATA"].MAP_FILE_PATH if not os.path.isdir(data_path): raise RuntimeError("Directory %s does not exist" % data_path) cfg["DATA"].CLASS_MAP_FILE = os.path.join(data_path, cfg["DATA"].CLASS_MAP_FILE) cfg["DATA"].TRAIN_MAP_FILE = os.path.join(data_path, cfg["DATA"].TRAIN_MAP_FILE) cfg["DATA"].TEST_MAP_FILE = os.path.join(data_path, cfg["DATA"].TEST_MAP_FILE) cfg["DATA"].TRAIN_ROI_FILE = os.path.join(data_path, cfg["DATA"].TRAIN_ROI_FILE) cfg["DATA"].TEST_ROI_FILE = os.path.join(data_path, cfg["DATA"].TEST_ROI_FILE) cfg['MODEL_PATH'] = os.path.join(cfg.OUTPUT_PATH, "faster_rcnn_eval_{}_{}.model" .format(cfg["MODEL"].BASE_MODEL, "e2e" if cfg["CNTK"].TRAIN_E2E else "4stage")) cfg['BASE_MODEL_PATH'] = os.path.join(os.path.dirname(os.path.abspath(__file__)), "..", "..", "..", "..", "PretrainedModels", cfg["MODEL"].BASE_MODEL_FILE) cfg["DATA"].CLASSES = parse_class_map_file(cfg["DATA"].CLASS_MAP_FILE) cfg["DATA"].NUM_CLASSES = len(cfg["DATA"].CLASSES) if cfg["CNTK"].FAST_MODE: cfg["CNTK"].E2E_MAX_EPOCHS = 1 cfg["CNTK"].RPN_EPOCHS = 1 cfg["CNTK"].FRCN_EPOCHS = 1 if cfg["CNTK"].FORCE_DETERMINISTIC: force_deterministic_algorithms() np.random.seed(seed=cfg.RND_SEED) if False and cfg["CNTK"].DEBUG_OUTPUT: # report args print("Using the following parameters:") print("Flip image : {}".format(cfg["TRAIN"].USE_FLIPPED)) print("Train conv layers: {}".format(cfg.TRAIN_CONV_LAYERS)) print("Random seed : {}".format(cfg.RND_SEED)) print("Momentum per MB : {}".format(cfg["CNTK"].MOMENTUM_PER_MB)) if cfg["CNTK"].TRAIN_E2E: print("E2E epochs : {}".format(cfg["CNTK"].E2E_MAX_EPOCHS)) else: print("RPN lr factor : {}".format(cfg["CNTK"].RPN_LR_FACTOR)) print("RPN epochs : {}".format(cfg["CNTK"].RPN_EPOCHS)) print("FRCN lr factor : {}".format(cfg["CNTK"].FRCN_LR_FACTOR)) print("FRCN epochs : {}".format(cfg["CNTK"].FRCN_EPOCHS))
# Licensed under the MIT license. See LICENSE.md file in the project root # for full license information. # ============================================================================== import numpy as np import os import pytest import sys from cntk import load_model from cntk.cntk_py import DeviceKind_GPU from cntk.device import try_set_default_device, gpu from cntk.logging.graph import get_node_outputs from cntk.ops.tests.ops_test_utils import cntk_device from _cntk_py import force_deterministic_algorithms force_deterministic_algorithms() abs_path = os.path.dirname(os.path.abspath(__file__)) sys.path.append(abs_path) sys.path.append( os.path.join(abs_path, "..", "..", "..", "..", "Examples", "Image", "Detection", "FasterRCNN")) from prepare_test_data import prepare_Grocery_data, prepare_alexnet_v0_model grocery_path = prepare_Grocery_data() prepare_alexnet_v0_model() from install_data_and_model import create_grocery_mappings create_grocery_mappings(grocery_path) win35_linux34 = pytest.mark.skipif(
def test_transfer_learning(device_id): set_fixed_random_seed(1) force_deterministic_algorithms() if cntk_device(device_id).type() != DeviceKind_GPU: pytest.skip('test only runs on GPU') # due to batch normalization in ResNet_18 try_set_default_device(cntk_device(device_id)) base_path = os.path.dirname(os.path.abspath(__file__)) animals_path = os.path.join(base_path, *"../../../../Examples/Image/DataSets/Animals".split("/")) externalData = 'CNTK_EXTERNAL_TESTDATA_SOURCE_DIRECTORY' in os.environ if externalData: extPath = os.environ['CNTK_EXTERNAL_TESTDATA_SOURCE_DIRECTORY'] model_file = os.path.join(extPath, *"PreTrainedModels/ResNet/v1/ResNet_18.model".split("/")) if not os.path.isfile(os.path.join(animals_path, 'Test', 'Weaver_bird.jpg')): # copy data from external test data location and unzip os.chdir(os.path.join(base_path, '..', '..', '..')) prepare_animals_data() os.chdir(base_path) zip_path = os.path.join(animals_path, 'Animals.zip') with zipfile.ZipFile(zip_path) as myzip: myzip.extractall(os.path.join(animals_path, '..')) else: model_file = os.path.join(base_path, *"../../../../Examples/Image/PretrainedModels/ResNet_18.model".split("/")) train_image_folder = os.path.join(animals_path, "Train") test_image_folder = os.path.join(animals_path, "Test") output_file = os.path.join(base_path, "tl_extended_output.txt") train_and_eval(model_file, train_image_folder, test_image_folder, output_file, None, testing=True) expected_output_file = os.path.join(base_path, "tl_extended_expected_output.txt") with open(output_file) as output_json: output_lines = output_json.readlines() with open(expected_output_file) as expected_output_json: expected_output_lines = expected_output_json.readlines() # handling different ordering of files out_dict = {} exp_dict = {} for i in range(len(output_lines)): output = json.loads(output_lines[i])[0] expected_output = json.loads(expected_output_lines[i])[0] out_dict[output["image"]] = output exp_dict[expected_output["image"]] = expected_output # debug output for k in out_dict: output = out_dict[k] expected_output = exp_dict[k] print("output: {}".format(output)) print("expect: {}".format(expected_output)) for k in out_dict: output = out_dict[k] expected_output = exp_dict[k] assert np.allclose(output["predictions"]["Sheep"], expected_output["predictions"]["Sheep"], atol=TOLERANCE_ABSOLUTE) assert np.allclose(output["predictions"]["Wolf"], expected_output["predictions"]["Wolf"], atol=TOLERANCE_ABSOLUTE)
def set_global_vars(use_arg_parser=True): data_path = map_file_path # set and overwrite learning parameters globalvars['rpn_lr_factor'] = cfg["CNTK"].RPN_LR_FACTOR globalvars['frcn_lr_factor'] = cfg["CNTK"].FRCN_LR_FACTOR globalvars['e2e_lr_factor'] = cfg["CNTK"].E2E_LR_FACTOR globalvars['momentum_per_mb'] = cfg["CNTK"].MOMENTUM_PER_MB globalvars['e2e_epochs'] = 1 if cfg["CNTK"].FAST_MODE else cfg[ "CNTK"].E2E_MAX_EPOCHS globalvars[ 'rpn_epochs'] = 1 if cfg["CNTK"].FAST_MODE else cfg["CNTK"].RPN_EPOCHS globalvars['frcn_epochs'] = 1 if cfg["CNTK"].FAST_MODE else cfg[ "CNTK"].FRCN_EPOCHS globalvars['rnd_seed'] = cfg.RNG_SEED globalvars['train_conv'] = cfg["CNTK"].TRAIN_CONV_LAYERS globalvars['train_e2e'] = cfg["CNTK"].TRAIN_E2E globalvars['fea_map_dim'] = cfg["CNTK"].FEA_MAP_DIM if use_arg_parser: parser = argparse.ArgumentParser() parser.add_argument( '-datadir', '--datadir', help='Data directory where the ImageNet dataset is located', required=False, default=data_path) parser.add_argument('-outputdir', '--outputdir', help='Output directory for checkpoints and models', required=False, default=None) parser.add_argument('-logdir', '--logdir', help='Log file', required=False, default=None) parser.add_argument('-n', '--num_epochs', help='Total number of epochs to train', type=int, required=False, default=cfg["CNTK"].E2E_MAX_EPOCHS) parser.add_argument('-m', '--minibatch_size', help='Minibatch size', type=int, required=False, default=mb_size) parser.add_argument('-e', '--epoch_size', help='Epoch size', type=int, required=False, default=epoch_size) parser.add_argument( '-q', '--quantized_bits', help='Number of quantized bits used for gradient aggregation', type=int, required=False, default='32') parser.add_argument( '-r', '--restart', help= 'Indicating whether to restart from scratch (instead of restart from checkpoint file by default)', action='store_true') parser.add_argument( '-device', '--device', type=int, help="Force to run the script on a specified device", required=False, default=None) parser.add_argument('-rpnLrFactor', '--rpnLrFactor', type=float, help="Scale factor for rpn lr schedule", required=False) parser.add_argument('-frcnLrFactor', '--frcnLrFactor', type=float, help="Scale factor for frcn lr schedule", required=False) parser.add_argument('-e2eLrFactor', '--e2eLrFactor', type=float, help="Scale factor for e2e lr schedule", required=False) parser.add_argument('-momentumPerMb', '--momentumPerMb', type=float, help="momentum per minibatch", required=False) parser.add_argument('-e2eEpochs', '--e2eEpochs', type=int, help="number of epochs for e2e training", required=False) parser.add_argument('-rpnEpochs', '--rpnEpochs', type=int, help="number of epochs for rpn training", required=False) parser.add_argument('-frcnEpochs', '--frcnEpochs', type=int, help="number of epochs for frcn training", required=False) parser.add_argument('-rndSeed', '--rndSeed', type=int, help="the random seed", required=False) parser.add_argument('-trainConv', '--trainConv', type=int, help="whether to train conv layers", required=False) parser.add_argument('-trainE2E', '--trainE2E', type=int, help="whether to train e2e (otherwise 4 stage)", required=False) args = vars(parser.parse_args()) if args['rpnLrFactor'] is not None: globalvars['rpn_lr_factor'] = args['rpnLrFactor'] if args['frcnLrFactor'] is not None: globalvars['frcn_lr_factor'] = args['frcnLrFactor'] if args['e2eLrFactor'] is not None: globalvars['e2e_lr_factor'] = args['e2eLrFactor'] if args['momentumPerMb'] is not None: globalvars['momentum_per_mb'] = args['momentumPerMb'] if args['e2eEpochs'] is not None: globalvars['e2e_epochs'] = args['e2eEpochs'] if args['rpnEpochs'] is not None: globalvars['rpn_epochs'] = args['rpnEpochs'] if args['frcnEpochs'] is not None: globalvars['frcn_epochs'] = args['frcnEpochs'] if args['rndSeed'] is not None: globalvars['rnd_seed'] = args['rndSeed'] if args['trainConv'] is not None: globalvars[ 'train_conv'] = True if args['trainConv'] == 1 else False if args['trainE2E'] is not None: globalvars['train_e2e'] = True if args['trainE2E'] == 1 else False if args['outputdir'] is not None: globalvars['output_path'] = args['outputdir'] if args['logdir'] is not None: log_dir = args['logdir'] if args['device'] is not None: # Setting one worker on GPU and one worker on CPU. Otherwise memory consumption is too high for a single GPU. if Communicator.rank() == 0: cntk.device.try_set_default_device( cntk.device.gpu(args['device'])) else: cntk.device.try_set_default_device(cntk.device.cpu()) if args['datadir'] is not None: data_path = args['datadir'] if not os.path.isdir(data_path): raise RuntimeError("Directory %s does not exist" % data_path) globalvars['class_map_file'] = os.path.join(data_path, globalvars['class_map_file']) globalvars['train_map_file'] = os.path.join(data_path, globalvars['train_map_file']) globalvars['test_map_file'] = os.path.join(data_path, globalvars['test_map_file']) globalvars['train_roi_file'] = os.path.join(data_path, globalvars['train_roi_file']) globalvars['test_roi_file'] = os.path.join(data_path, globalvars['test_roi_file']) if cfg["CNTK"].FORCE_DETERMINISTIC: force_deterministic_algorithms() np.random.seed(seed=globalvars['rnd_seed']) globalvars['classes'] = parse_class_map_file(globalvars['class_map_file']) globalvars['num_classes'] = len(globalvars['classes']) if cfg["CNTK"].DEBUG_OUTPUT: # report args print("Using the following parameters:") print("Flip image : {}".format(cfg["TRAIN"].USE_FLIPPED)) print("Train conv layers: {}".format(globalvars['train_conv'])) print("Random seed : {}".format(globalvars['rnd_seed'])) print("Momentum per MB : {}".format(globalvars['momentum_per_mb'])) if globalvars['train_e2e']: print("E2E epochs : {}".format(globalvars['e2e_epochs'])) else: print("RPN lr factor : {}".format(globalvars['rpn_lr_factor'])) print("RPN epochs : {}".format(globalvars['rpn_epochs'])) print("FRCN lr factor : {}".format(globalvars['frcn_lr_factor'])) print("FRCN epochs : {}".format(globalvars['frcn_epochs']))
t += trainer.previous_minibatch_sample_count # count samples processed so far progress_printer.update_with_trainer(trainer, with_metric=True) # log progress #def trace_node(name): # nl = [n for n in z.parameters if n.name() == name] # if len(nl) > 0: # print (name, np.asarray(nl[0].value)) #trace_node('W') #trace_node('stabilizer_param') loss, metric, actual_samples = progress_printer.epoch_summary(with_metric=True) return loss, metric ############################# # main function boilerplate # ############################# if __name__=='__main__': # TODO: leave these in for now as debugging aids; remove for beta from _cntk_py import set_computation_network_trace_level, set_fixed_random_seed, force_deterministic_algorithms #set_computation_network_trace_level(1) # TODO: remove debugging facilities once this all works set_fixed_random_seed(1) # BUGBUG: has no effect at present # TODO: remove debugging facilities once this all works force_deterministic_algorithms() reader = create_reader(data_dir + "/atis.train.ctf") model = create_model() # train train(reader, model, max_epochs=8) # test (TODO) reader = create_reader(data_dir + "/atis.test.ctf") #test(reader, model_dir + "/slu.cmf") # TODO: what is the correct pattern here?
def test_language_understanding(device_id): from cntk.ops.tests.ops_test_utils import cntk_device DeviceDescriptor.try_set_default_device(cntk_device(device_id)) from _cntk_py import set_computation_network_trace_level, set_fixed_random_seed, force_deterministic_algorithms #set_computation_network_trace_level(1) set_fixed_random_seed( 1 ) # to become invariant to initialization order, which is a valid change # BUGBUG: This ^^ currently seems to have no impact; the two BN models below should be identical in training force_deterministic_algorithms() if device_id >= 0: # BatchNormalization currently does not run on CPU # change to intent classifier --moved up here since this fails, as repro # BUGBUG: Broken, need to pass new criterion to train(). #with default_options(initial_state=0.1): # inject an option to mimic the BS version identically; remove some day # select_last = slice(placeholder(), Axis.default_dynamic_axis(), -1, 0) # # BUGBUG: Fails with "RuntimeError: The specified dynamic axis named defaultDynamicAxis does not match any of the dynamic axes of the operand" # run_model_test('change to intent classifier', Sequential([ # Embedding(emb_dim), # with_lookahead(), # BatchNormalization(), # BiRecurrence(LSTM(hidden_dim)), # BatchNormalization(), # select_last, # fails here with an axis problem # Dense(num_labels) # ]), [0.084, 0.407364]) # replace lookahead by bidirectional model with default_options( initial_state=0.1 ): # inject an option to mimic the BS version identically; remove some day run_model_test( 'replace lookahead by bidirectional model', Sequential([ Embedding(emb_dim), BatchNormalization(), BiRecurrence(LSTM(hidden_dim), LSTM(hidden_dim)), BatchNormalization(), Dense(num_labels) ]), [0.0579573500457558, 0.3214986774820327]) # replace lookahead by bidirectional model with default_options( initial_state=0.1 ): # inject an option to mimic the BS version identically; remove some day #with default_options(dtype=np.float64): # test this with double precision since single precision is too little for reproducable aggregation # ^^ This test requires to change the #if 1 in Functions.cpp PopulateNetworkInputs() to be changed to #if 0. run_model_test( 'replace lookahead by bidirectional model, with shared BN', Sequential([ Embedding(emb_dim), BNBiRecurrence(LSTM(hidden_dim), LSTM(hidden_dim), test_dual=True), #BNBiRecurrence(LSTM(hidden_dim), LSTM(hidden_dim), test_dual=False), BatchNormalization(normalization_time_constant=-1), Dense(num_labels) ]), [0.0579573500457558, 0.3214986774820327]) # values with normalization_time_constant=-1 and double precision: # [0.0583178503091983, 0.3199431143304898] """ with normalization_time_constant=-1: Minibatch[ 1- 1]: loss = 5.945220 * 67, metric = 100.0% * 67 Minibatch[ 2- 2]: loss = 4.850601 * 63, metric = 79.4% * 63 Minibatch[ 3- 3]: loss = 3.816031 * 68, metric = 57.4% * 68 Minibatch[ 4- 4]: loss = 2.213172 * 70, metric = 41.4% * 70 Minibatch[ 5- 5]: loss = 2.615342 * 65, metric = 40.0% * 65 Minibatch[ 6- 6]: loss = 2.360896 * 62, metric = 25.8% * 62 Minibatch[ 7- 7]: loss = 1.452822 * 58, metric = 27.6% * 58 Minibatch[ 8- 8]: loss = 0.947210 * 70, metric = 10.0% * 70 Minibatch[ 9- 9]: loss = 0.595654 * 59, metric = 10.2% * 59 Minibatch[ 10- 10]: loss = 1.515479 * 64, metric = 23.4% * 64 Minibatch[ 11- 100]: loss = 0.686744 * 5654, metric = 10.4% * 5654 Minibatch[ 101- 200]: loss = 0.289059 * 6329, metric = 5.8% * 6329 Minibatch[ 201- 300]: loss = 0.218765 * 6259, metric = 4.7% * 6259 Minibatch[ 301- 400]: loss = 0.182855 * 6229, metric = 3.5% * 6229 Minibatch[ 401- 500]: loss = 0.156745 * 6289, metric = 3.4% * 6289 Finished Epoch [1]: [Training] loss = 0.321413 * 36061, metric = 5.8% * 36061 --> 0.057818696098277916 0.3214128415043278 Minibatch[ 1- 1]: loss = 0.000000 * 991, metric = 2.5% * 991 Minibatch[ 2- 2]: loss = 0.000000 * 1000, metric = 2.8% * 1000 Minibatch[ 3- 3]: loss = 0.000000 * 992, metric = 4.0% * 992 Minibatch[ 4- 4]: loss = 0.000000 * 989, metric = 3.0% * 989 Minibatch[ 5- 5]: loss = 0.000000 * 998, metric = 3.8% * 998 Minibatch[ 6- 6]: loss = 0.000000 * 995, metric = 1.5% * 995 Minibatch[ 7- 7]: loss = 0.000000 * 998, metric = 2.5% * 998 Minibatch[ 8- 8]: loss = 0.000000 * 992, metric = 1.6% * 992 Minibatch[ 9- 9]: loss = 0.000000 * 1000, metric = 1.6% * 1000 Minibatch[ 10- 10]: loss = 0.000000 * 996, metric = 7.9% * 996 Finished Epoch [1]: [Evaluation] loss = 0.000000 * 10984, metric = 3.2% * 10984 --> 0.03159140568099053 0.0 """ # BatchNorm test case for global-corpus aggregation with default_options( initial_state=0.1 ): # inject an option to mimic the BS version identically; remove some day run_model_test( 'BatchNorm global-corpus aggregation', Sequential([ Embedding(emb_dim), BatchNormalization(normalization_time_constant=-1), Recurrence(LSTM(hidden_dim), go_backwards=False), BatchNormalization(normalization_time_constant=-1), Dense(num_labels) ]), [0.05662627214996811, 0.2968516879905391]) """ Minibatch[ 1- 1]: loss = 5.745576 * 67, metric = 100.0% * 67 Minibatch[ 2- 2]: loss = 4.684151 * 63, metric = 90.5% * 63 Minibatch[ 3- 3]: loss = 3.957423 * 68, metric = 63.2% * 68 Minibatch[ 4- 4]: loss = 2.286908 * 70, metric = 41.4% * 70 Minibatch[ 5- 5]: loss = 2.733978 * 65, metric = 38.5% * 65 Minibatch[ 6- 6]: loss = 2.189765 * 62, metric = 30.6% * 62 Minibatch[ 7- 7]: loss = 1.427890 * 58, metric = 25.9% * 58 Minibatch[ 8- 8]: loss = 1.501557 * 70, metric = 18.6% * 70 Minibatch[ 9- 9]: loss = 0.632599 * 59, metric = 13.6% * 59 Minibatch[ 10- 10]: loss = 1.516047 * 64, metric = 23.4% * 64 Minibatch[ 11- 100]: loss = 0.580329 * 5654, metric = 9.8% * 5654 Minibatch[ 101- 200]: loss = 0.280317 * 6329, metric = 5.6% * 6329 Minibatch[ 201- 300]: loss = 0.188372 * 6259, metric = 4.1% * 6259 Minibatch[ 301- 400]: loss = 0.170403 * 6229, metric = 3.9% * 6229 Minibatch[ 401- 500]: loss = 0.159605 * 6289, metric = 3.4% * 6289 Finished Epoch [1]: [Training] loss = 0.296852 * 36061, metric = 5.7% * 36061 --> 0.05662627214996811 0.2968516879905391 Minibatch[ 1- 1]: loss = 0.000000 * 991, metric = 1.8% * 991 Minibatch[ 2- 2]: loss = 0.000000 * 1000, metric = 3.4% * 1000 Minibatch[ 3- 3]: loss = 0.000000 * 992, metric = 3.9% * 992 Minibatch[ 4- 4]: loss = 0.000000 * 989, metric = 4.1% * 989 Minibatch[ 5- 5]: loss = 0.000000 * 998, metric = 4.0% * 998 Minibatch[ 6- 6]: loss = 0.000000 * 995, metric = 1.2% * 995 Minibatch[ 7- 7]: loss = 0.000000 * 998, metric = 2.8% * 998 Minibatch[ 8- 8]: loss = 0.000000 * 992, metric = 2.9% * 992 Minibatch[ 9- 9]: loss = 0.000000 * 1000, metric = 2.0% * 1000 Minibatch[ 10- 10]: loss = 0.000000 * 996, metric = 8.2% * 996 Finished Epoch [1]: [Evaluation] loss = 0.000000 * 10984, metric = 3.5% * 10984 --> 0.035050983248361256 0.0 """ # plus BatchNorm with default_options( initial_state=0.1 ): # inject an option to mimic the BS version identically; remove some day run_model_test( 'plus BatchNorm', Sequential([ Embedding(emb_dim), BatchNormalization(), Recurrence(LSTM(hidden_dim), go_backwards=False), BatchNormalization(), Dense(num_labels) ]), [0.05662627214996811, 0.2968516879905391]) # plus lookahead with default_options( initial_state=0.1 ): # inject an option to mimic the BS version identically; remove some day run_model_test( 'plus lookahead', Sequential([ Embedding(emb_dim), with_lookahead(), BatchNormalization(), Recurrence(LSTM(hidden_dim), go_backwards=False), BatchNormalization(), Dense(num_labels) ]), [0.057901888466764646, 0.3044637752807047]) # replace lookahead by bidirectional model with default_options( initial_state=0.1 ): # inject an option to mimic the BS version identically; remove some day run_model_test( 'replace lookahead by bidirectional model', Sequential([ Embedding(emb_dim), BatchNormalization(), BiRecurrence(LSTM(hidden_dim), LSTM(hidden_dim)), BatchNormalization(), Dense(num_labels) ]), [0.0579573500457558, 0.3214986774820327]) # test of a config like in the example but with additions to test many code paths with default_options(enable_self_stabilization=True, use_peepholes=True): run_model_test( 'alternate paths', Sequential([ Embedding(emb_dim), BatchNormalization(), Recurrence(LSTM(hidden_dim, cell_shape=hidden_dim + 50), go_backwards=True), BatchNormalization(map_rank=1), Dense(num_labels) ]), [0.08574360112032389, 0.41847621578367716]) # test of the example itself # this emulates the main code in the PY file if device_id >= 0: # sparse FSAdagrad currently does not run on CPU --TODO: fix this test once it does reader = create_reader(data_dir + "/atis.train.ctf", is_training=True) model = create_model_function() loss_avg, evaluation_avg = train(reader, model, max_epochs=1) expected_avg = [0.09698114255561419, 0.5290531086061565] assert np.allclose([evaluation_avg, loss_avg], expected_avg, atol=TOLERANCE_ABSOLUTE) # test reader = create_reader(data_dir + "/atis.test.ctf", is_training=False) evaluate(reader, model) # test of a config like in the example but with additions to test many code paths if device_id >= 0: # BatchNormalization currently does not run on CPU # Create a path to TensorBoard log directory and make sure it does not exist. abs_path = os.path.dirname(os.path.abspath(__file__)) tb_logdir = os.path.join(abs_path, 'language_understanding_test_log') if os.path.exists(tb_logdir): shutil.rmtree(tb_logdir) reader = create_reader(data_dir + "/atis.train.ctf", is_training=True) model = create_test_model() # TODO: update example to support tensorboard, or decide to not show it in all examples (in upcoming update of examples) loss_avg, evaluation_avg = train( reader, model, max_epochs=1) #, tensorboard_logdir=tb_logdir) log_number_of_parameters(model, trace_level=1) print() expected_avg = [0.084, 0.407364] assert np.allclose([evaluation_avg, loss_avg], expected_avg, atol=TOLERANCE_ABSOLUTE)
def set_global_vars(use_arg_parser=True): global globalvars global image_width global image_height global dims_input_const global img_pad_value global normalization_const global map_file_path global epoch_size global num_test_images global model_folder global base_model_file global feature_node_name global last_conv_node_name global start_train_conv_node_name global pool_node_name global last_hidden_node_name global roi_dim global prediction global prediction_in global prediction_out if use_arg_parser: parser = argparse.ArgumentParser() parser.add_argument('-c', '--config', help='Configuration file in YAML format', required=False, default=None) parser.add_argument('-t', '--device_type', type=str, help="The type of the device (cpu|gpu)", required=False, default="cpu") parser.add_argument( '-d', '--device', type=int, help="Force to run the script on a specified device", required=False, default=None) parser.add_argument('-l', '--list_devices', action='store_true', help="Lists the available devices and exits", required=False, default=False) parser.add_argument('--prediction', action='store_true', help="Switches to prediction mode", required=False, default=False) parser.add_argument( '--prediction_in', action='append', type=str, help= "The input directory for images in prediction mode. Can be supplied mulitple times.", required=False, default=list()) parser.add_argument( '--prediction_out', action='append', type=str, help= "The output directory for processed images and predicitons in prediction mode. Can be supplied mulitple times.", required=False, default=list()) parser.add_argument( '--no_headers', action='store_true', help="Whether to suppress the header row in the ROI CSV files", required=False, default=False) parser.add_argument( '--output_width_height', action='store_true', help= "Whether to output width/height instead of second x/y in the ROI CSV files", required=False, default=False) parser.add_argument( '--suppressed_labels', type=str, help= "Comma-separated list of labels to suppress from being output in ROI CSV files.", required=False, default="") args = vars(parser.parse_args()) # prediction mode? prediction = args['prediction'] if prediction: prediction_in = args['prediction_in'] if len(prediction_in) == 0: raise RuntimeError("No prediction input directory provided!") for p in prediction_in: if not os.path.exists(p): raise RuntimeError( "Prediction input directory '%s' does not exist" % p) prediction_out = args['prediction_out'] if len(prediction_out) == 0: raise RuntimeError("No prediction output directory provided!") for p in prediction_out: if not os.path.exists(p): raise RuntimeError( "Prediction output directory '%s' does not exist" % p) if len(prediction_in) != len(prediction_out): raise RuntimeError( "Number of input and output directories don't match: %i != %i" % (len(prediction_in), len(prediction_out))) for i in range(len(prediction_in)): if prediction_in[i] == prediction_out[i]: raise RuntimeError( "Input and output directories #%i for prediction are the same: %s" % ((i + 1), prediction_in[i])) if args['list_devices']: print("Available devices (Type - ID - description)") for d in cntk.device.all_devices(): if d.type() == 0: type = "cpu" elif d.type() == 1: type = "gpu" else: type = "<unknown:" + str(d.type()) + ">" print(type + " - " + str(d.id()) + " - " + str(d)) sys.exit(0) if args['config'] is not None: cfg_from_file(args['config']) if args['device'] is not None: if args['device_type'] == 'gpu': cntk.device.try_set_default_device( cntk.device.gpu(args['device'])) else: cntk.device.try_set_default_device(cntk.device.cpu()) image_width = cfg["CNTK"].IMAGE_WIDTH image_height = cfg["CNTK"].IMAGE_HEIGHT # dims_input -- (pad_width, pad_height, scaled_image_width, scaled_image_height, orig_img_width, orig_img_height) dims_input_const = MinibatchData( Value(batch=np.asarray([ image_width, image_height, image_width, image_height, image_width, image_height ], dtype=np.float32)), 1, 1, False) # Color used for padding and normalization (Caffe model uses [102.98010, 115.94650, 122.77170]) img_pad_value = [103, 116, 123] if cfg["CNTK"].BASE_MODEL == "VGG16" else [ 114, 114, 114 ] normalization_const = Constant([[[103]], [[116]], [[ 123 ]]]) if cfg["CNTK"].BASE_MODEL == "VGG16" else Constant([[[114]], [[114]], [[114]]]) # dataset specific parameters map_file_path = os.path.join(abs_path, cfg["CNTK"].MAP_FILE_PATH) globalvars['class_map_file'] = cfg["CNTK"].CLASS_MAP_FILE globalvars['train_map_file'] = cfg["CNTK"].TRAIN_MAP_FILE globalvars['test_map_file'] = cfg["CNTK"].TEST_MAP_FILE globalvars['train_roi_file'] = cfg["CNTK"].TRAIN_ROI_FILE globalvars['test_roi_file'] = cfg["CNTK"].TEST_ROI_FILE globalvars['output_path'] = cfg["CNTK"].OUTPUT_PATH epoch_size = cfg["CNTK"].NUM_TRAIN_IMAGES num_test_images = cfg["CNTK"].NUM_TEST_IMAGES # model specific parameters if cfg["CNTK"].PRETRAINED_MODELS.startswith(".."): model_folder = os.path.join(abs_path, cfg["CNTK"].PRETRAINED_MODELS) else: model_folder = cfg["CNTK"].PRETRAINED_MODELS base_model_file = os.path.join(model_folder, cfg["CNTK"].BASE_MODEL_FILE) feature_node_name = cfg["CNTK"].FEATURE_NODE_NAME last_conv_node_name = cfg["CNTK"].LAST_CONV_NODE_NAME start_train_conv_node_name = cfg["CNTK"].START_TRAIN_CONV_NODE_NAME pool_node_name = cfg["CNTK"].POOL_NODE_NAME last_hidden_node_name = cfg["CNTK"].LAST_HIDDEN_NODE_NAME roi_dim = cfg["CNTK"].ROI_DIM data_path = map_file_path # set and overwrite learning parameters globalvars['rpn_lr_factor'] = cfg["CNTK"].RPN_LR_FACTOR globalvars['frcn_lr_factor'] = cfg["CNTK"].FRCN_LR_FACTOR globalvars['e2e_lr_factor'] = cfg["CNTK"].E2E_LR_FACTOR globalvars['momentum_per_mb'] = cfg["CNTK"].MOMENTUM_PER_MB globalvars['e2e_epochs'] = 1 if cfg["CNTK"].FAST_MODE else cfg[ "CNTK"].E2E_MAX_EPOCHS globalvars[ 'rpn_epochs'] = 1 if cfg["CNTK"].FAST_MODE else cfg["CNTK"].RPN_EPOCHS globalvars['frcn_epochs'] = 1 if cfg["CNTK"].FAST_MODE else cfg[ "CNTK"].FRCN_EPOCHS globalvars['rnd_seed'] = cfg.RNG_SEED globalvars['train_conv'] = cfg["CNTK"].TRAIN_CONV_LAYERS globalvars['train_e2e'] = cfg["CNTK"].TRAIN_E2E if not os.path.isdir(data_path): raise RuntimeError("Directory %s does not exist" % data_path) globalvars['class_map_file'] = os.path.join(data_path, globalvars['class_map_file']) globalvars['train_map_file'] = os.path.join(data_path, globalvars['train_map_file']) globalvars['test_map_file'] = os.path.join(data_path, globalvars['test_map_file']) globalvars['train_roi_file'] = os.path.join(data_path, globalvars['train_roi_file']) globalvars['test_roi_file'] = os.path.join(data_path, globalvars['test_roi_file']) globalvars['headers'] = not args['no_headers'] globalvars['output_width_height'] = args['output_width_height'] suppressed_labels = [] if len(args['suppressed_labels']) > 0: suppressed_labels = args['suppressed_labels'].split(",") globalvars['suppressed_labels'] = suppressed_labels if cfg["CNTK"].FORCE_DETERMINISTIC: force_deterministic_algorithms() np.random.seed(seed=globalvars['rnd_seed']) globalvars['classes'] = parse_class_map_file(globalvars['class_map_file']) globalvars['num_classes'] = len(globalvars['classes']) if cfg["CNTK"].DEBUG_OUTPUT: # report args print("Using the following parameters:") print("Flip image : {}".format(cfg["TRAIN"].USE_FLIPPED)) print("Train conv layers: {}".format(globalvars['train_conv'])) print("Random seed : {}".format(globalvars['rnd_seed'])) print("Momentum per MB : {}".format(globalvars['momentum_per_mb'])) if globalvars['train_e2e']: print("E2E epochs : {}".format(globalvars['e2e_epochs'])) else: print("RPN lr factor : {}".format(globalvars['rpn_lr_factor'])) print("RPN epochs : {}".format(globalvars['rpn_epochs'])) print("FRCN lr factor : {}".format(globalvars['frcn_lr_factor'])) print("FRCN epochs : {}".format(globalvars['frcn_epochs']))
def test_transfer_learning(device_id): set_fixed_random_seed(1) force_deterministic_algorithms() if cntk_device(device_id).type() != DeviceKind_GPU: pytest.skip( 'test only runs on GPU') # due to batch normalization in ResNet_18 try_set_default_device(cntk_device(device_id)) base_path = os.path.dirname(os.path.abspath(__file__)) animals_path = os.path.join( base_path, *"../../../../Examples/Image/DataSets/Animals".split("/")) externalData = 'CNTK_EXTERNAL_TESTDATA_SOURCE_DIRECTORY' in os.environ if externalData: extPath = os.environ['CNTK_EXTERNAL_TESTDATA_SOURCE_DIRECTORY'] model_file = os.path.join( extPath, *"PreTrainedModels/ResNet/v1/ResNet_18.model".split("/")) if not os.path.isfile( os.path.join(animals_path, 'Test', 'Weaver_bird.jpg')): # copy data from external test data location and unzip os.chdir(os.path.join(base_path, '..', '..', '..')) prepare_animals_data() os.chdir(base_path) zip_path = os.path.join(animals_path, 'Animals.zip') with zipfile.ZipFile(zip_path) as myzip: myzip.extractall(os.path.join(animals_path, '..')) else: model_file = os.path.join( base_path, *"../../../../Examples/Image/PretrainedModels/ResNet_18.model". split("/")) train_image_folder = os.path.join(animals_path, "Train") test_image_folder = os.path.join(animals_path, "Test") output_file = os.path.join(base_path, "tl_extended_output.txt") train_and_eval(model_file, train_image_folder, test_image_folder, output_file, None, testing=True) expected_output_file = os.path.join(base_path, "tl_extended_expected_output.txt") with open(output_file) as output_json: output_lines = output_json.readlines() with open(expected_output_file) as expected_output_json: expected_output_lines = expected_output_json.readlines() # handling different ordering of files out_dict = {} exp_dict = {} for i in range(len(output_lines)): output = json.loads(output_lines[i])[0] expected_output = json.loads(expected_output_lines[i])[0] out_dict[output["image"]] = output exp_dict[expected_output["image"]] = expected_output # debug output for k in out_dict: output = out_dict[k] expected_output = exp_dict[k] print("output: {}".format(output)) print("expect: {}".format(expected_output)) for k in out_dict: output = out_dict[k] expected_output = exp_dict[k] assert np.allclose(output["predictions"]["Sheep"], expected_output["predictions"]["Sheep"], atol=TOLERANCE_ABSOLUTE) assert np.allclose(output["predictions"]["Wolf"], expected_output["predictions"]["Wolf"], atol=TOLERANCE_ABSOLUTE)
def prepare(cfg, use_arg_parser=True): cfg.MB_SIZE = 1 cfg.NUM_CHANNELS = 3 cfg.OUTPUT_PATH = os.path.join(abs_path, "Output") data_path = cfg["DATA"].MAP_FILE_PATH if load_file_from_blob(cfg["AZURE"].ACCOUNT_NAME, \ cfg["AZURE"].DATA, cfg["DATA"].DATASET +".zip", data_path+".zip" ) is True: unzip_file(data_path + ".zip", data_path) if not os.path.isdir(data_path): raise RuntimeError("Directory %s does not exist" % data_path) running_locally = os.path.exists(cfg["DATA"].MAP_FILE_PATH) if running_locally: os.chdir(cfg["DATA"].MAP_FILE_PATH) if not os.path.exists(os.path.join(abs_path, "Output")): os.makedirs(os.path.join(abs_path, "Output")) if not os.path.exists( os.path.join(abs_path, "Output", cfg["DATA"].DATASET)): os.makedirs(os.path.join(abs_path, "Output", cfg["DATA"].DATASET)) else: # disable debug and plot outputs when running on GPU cluster cfg["CNTK"].DEBUG_OUTPUT = False cfg.VISUALIZE_RESULTS = False if use_arg_parser: parse_arguments(cfg) cfg["DATA"].CLASS_MAP_FILE = os.path.join(data_path, cfg["DATA"].CLASS_MAP_FILE) cfg["DATA"].TRAIN_MAP_FILE = os.path.join(data_path, cfg["DATA"].TRAIN_MAP_FILE) cfg["DATA"].TEST_MAP_FILE = os.path.join(data_path, cfg["DATA"].TEST_MAP_FILE) cfg["DATA"].TRAIN_ROI_FILE = os.path.join(data_path, cfg["DATA"].TRAIN_ROI_FILE) cfg["DATA"].TEST_ROI_FILE = os.path.join(data_path, cfg["DATA"].TEST_ROI_FILE) cfg['OUTPUT_MODEL_NAME'] = "faster_rcnn_eval_{}_{}.model".format( cfg["MODEL"].BASE_MODEL, "e2e" if cfg["CNTK"].TRAIN_E2E else "4stage") cfg['MODEL_PATH'] = os.path.join( os.environ['AZUREML_NATIVE_SHARE_DIRECTORY'], "output", cfg['OUTPUT_MODEL_NAME']) cfg['BASE_MODEL_PATH'] = os.path.join( os.environ['AZUREML_NATIVE_SHARE_DIRECTORY'], "base_model") cfg["DATA"].CLASSES = parse_class_map_file(cfg["DATA"].CLASS_MAP_FILE) cfg["DATA"].NUM_CLASSES = len(cfg["DATA"].CLASSES) if cfg["CNTK"].FAST_MODE: cfg["CNTK"].E2E_MAX_EPOCHS = 1 cfg["CNTK"].RPN_EPOCHS = 1 cfg["CNTK"].FRCN_EPOCHS = 1 if cfg["CNTK"].FORCE_DETERMINISTIC: force_deterministic_algorithms() np.random.seed(seed=cfg.RND_SEED) print("Downloading base model {}".format(cfg["MODEL"].BASE_MODEL_FILE)) print("PRETRAINED_MODELS {} ".format(cfg["AZURE"].PRETRAINED_MODELS)) print("BASE_MODEL_FILE {}".format(cfg["MODEL"].BASE_MODEL_FILE)) cfg['BASE_MODEL_PATH'] = cfg['BASE_MODEL_PATH'] + '_' + cfg[ "MODEL"].BASE_MODEL_FILE print("BASE_MODEL_PATH {}".format(cfg['BASE_MODEL_PATH'])) # For testing to make sure this works when we don't have any stored model in shared directory # os.remove(cfg['BASE_MODEL_PATH']) load_file_from_blob(cfg["AZURE"].ACCOUNT_NAME, \ cfg["AZURE"].PRETRAINED_MODELS, cfg["MODEL"].BASE_MODEL_FILE, cfg['BASE_MODEL_PATH'] ) if False and cfg["CNTK"].DEBUG_OUTPUT: # report args print("Using the following parameters:") print("Flip image : {}".format(cfg["TRAIN"].USE_FLIPPED)) print("Train conv layers: {}".format(cfg.TRAIN_CONV_LAYERS)) print("Random seed : {}".format(cfg.RND_SEED)) print("Momentum per MB : {}".format(cfg["CNTK"].MOMENTUM_PER_MB)) if cfg["CNTK"].TRAIN_E2E: print("E2E epochs : {}".format(cfg["CNTK"].E2E_MAX_EPOCHS)) else: print("RPN lr factor : {}".format(cfg["CNTK"].RPN_LR_FACTOR)) print("RPN epochs : {}".format(cfg["CNTK"].RPN_EPOCHS)) print("FRCN lr factor : {}".format(cfg["CNTK"].FRCN_LR_FACTOR)) print("FRCN epochs : {}".format(cfg["CNTK"].FRCN_EPOCHS))
def test_language_understanding(device_id): from cntk.ops.tests.ops_test_utils import cntk_device DeviceDescriptor.try_set_default_device(cntk_device(device_id)) from _cntk_py import set_computation_network_trace_level, set_fixed_random_seed, force_deterministic_algorithms #set_computation_network_trace_level(1) set_fixed_random_seed(1) # to become invariant to initialization order, which is a valid change # BUGBUG: This ^^ currently seems to have no impact; the two BN models below should be identical in training force_deterministic_algorithms() if device_id >= 0: # BatchNormalization currently does not run on CPU # change to intent classifier --moved up here since this fails, as repro # BUGBUG: Broken, need to pass new criterion to train(). #with default_options(initial_state=0.1): # inject an option to mimic the BS version identically; remove some day # select_last = slice(placeholder(), Axis.default_dynamic_axis(), -1, 0) # # BUGBUG: Fails with "RuntimeError: The specified dynamic axis named defaultDynamicAxis does not match any of the dynamic axes of the operand" # run_model_test('change to intent classifier', Sequential([ # Embedding(emb_dim), # with_lookahead(), # BatchNormalization(), # BiRecurrence(LSTM(hidden_dim)), # BatchNormalization(), # select_last, # fails here with an axis problem # Dense(num_labels) # ]), [0.084, 0.407364]) # replace lookahead by bidirectional model with default_options(initial_state=0.1): # inject an option to mimic the BS version identically; remove some day run_model_test('replace lookahead by bidirectional model', Sequential([ Embedding(emb_dim), BatchNormalization(), BiRecurrence(LSTM(hidden_dim), LSTM(hidden_dim)), BatchNormalization(), Dense(num_labels) ]), [0.0579573500457558, 0.3214986774820327]) # replace lookahead by bidirectional model with default_options(initial_state=0.1): # inject an option to mimic the BS version identically; remove some day #with default_options(dtype=np.float64): # test this with double precision since single precision is too little for reproducable aggregation # ^^ This test requires to change the #if 1 in Functions.cpp PopulateNetworkInputs() to be changed to #if 0. run_model_test('replace lookahead by bidirectional model, with shared BN', Sequential([ Embedding(emb_dim), BNBiRecurrence(LSTM(hidden_dim), LSTM(hidden_dim), test_dual=True), #BNBiRecurrence(LSTM(hidden_dim), LSTM(hidden_dim), test_dual=False), BatchNormalization(normalization_time_constant=-1), Dense(num_labels) ]), [0.0579573500457558, 0.3214986774820327]) # values with normalization_time_constant=-1 and double precision: # [0.0583178503091983, 0.3199431143304898] """ with normalization_time_constant=-1: Minibatch[ 1- 1]: loss = 5.945220 * 67, metric = 100.0% * 67 Minibatch[ 2- 2]: loss = 4.850601 * 63, metric = 79.4% * 63 Minibatch[ 3- 3]: loss = 3.816031 * 68, metric = 57.4% * 68 Minibatch[ 4- 4]: loss = 2.213172 * 70, metric = 41.4% * 70 Minibatch[ 5- 5]: loss = 2.615342 * 65, metric = 40.0% * 65 Minibatch[ 6- 6]: loss = 2.360896 * 62, metric = 25.8% * 62 Minibatch[ 7- 7]: loss = 1.452822 * 58, metric = 27.6% * 58 Minibatch[ 8- 8]: loss = 0.947210 * 70, metric = 10.0% * 70 Minibatch[ 9- 9]: loss = 0.595654 * 59, metric = 10.2% * 59 Minibatch[ 10- 10]: loss = 1.515479 * 64, metric = 23.4% * 64 Minibatch[ 11- 100]: loss = 0.686744 * 5654, metric = 10.4% * 5654 Minibatch[ 101- 200]: loss = 0.289059 * 6329, metric = 5.8% * 6329 Minibatch[ 201- 300]: loss = 0.218765 * 6259, metric = 4.7% * 6259 Minibatch[ 301- 400]: loss = 0.182855 * 6229, metric = 3.5% * 6229 Minibatch[ 401- 500]: loss = 0.156745 * 6289, metric = 3.4% * 6289 Finished Epoch [1]: [Training] loss = 0.321413 * 36061, metric = 5.8% * 36061 --> 0.057818696098277916 0.3214128415043278 Minibatch[ 1- 1]: loss = 0.000000 * 991, metric = 2.5% * 991 Minibatch[ 2- 2]: loss = 0.000000 * 1000, metric = 2.8% * 1000 Minibatch[ 3- 3]: loss = 0.000000 * 992, metric = 4.0% * 992 Minibatch[ 4- 4]: loss = 0.000000 * 989, metric = 3.0% * 989 Minibatch[ 5- 5]: loss = 0.000000 * 998, metric = 3.8% * 998 Minibatch[ 6- 6]: loss = 0.000000 * 995, metric = 1.5% * 995 Minibatch[ 7- 7]: loss = 0.000000 * 998, metric = 2.5% * 998 Minibatch[ 8- 8]: loss = 0.000000 * 992, metric = 1.6% * 992 Minibatch[ 9- 9]: loss = 0.000000 * 1000, metric = 1.6% * 1000 Minibatch[ 10- 10]: loss = 0.000000 * 996, metric = 7.9% * 996 Finished Epoch [1]: [Evaluation] loss = 0.000000 * 10984, metric = 3.2% * 10984 --> 0.03159140568099053 0.0 """ # BatchNorm test case for global-corpus aggregation with default_options(initial_state=0.1): # inject an option to mimic the BS version identically; remove some day run_model_test('BatchNorm global-corpus aggregation', Sequential([ Embedding(emb_dim), BatchNormalization(normalization_time_constant=-1), Recurrence(LSTM(hidden_dim), go_backwards=False), BatchNormalization(normalization_time_constant=-1), Dense(num_labels) ]), [0.05662627214996811, 0.2968516879905391]) """ Minibatch[ 1- 1]: loss = 5.745576 * 67, metric = 100.0% * 67 Minibatch[ 2- 2]: loss = 4.684151 * 63, metric = 90.5% * 63 Minibatch[ 3- 3]: loss = 3.957423 * 68, metric = 63.2% * 68 Minibatch[ 4- 4]: loss = 2.286908 * 70, metric = 41.4% * 70 Minibatch[ 5- 5]: loss = 2.733978 * 65, metric = 38.5% * 65 Minibatch[ 6- 6]: loss = 2.189765 * 62, metric = 30.6% * 62 Minibatch[ 7- 7]: loss = 1.427890 * 58, metric = 25.9% * 58 Minibatch[ 8- 8]: loss = 1.501557 * 70, metric = 18.6% * 70 Minibatch[ 9- 9]: loss = 0.632599 * 59, metric = 13.6% * 59 Minibatch[ 10- 10]: loss = 1.516047 * 64, metric = 23.4% * 64 Minibatch[ 11- 100]: loss = 0.580329 * 5654, metric = 9.8% * 5654 Minibatch[ 101- 200]: loss = 0.280317 * 6329, metric = 5.6% * 6329 Minibatch[ 201- 300]: loss = 0.188372 * 6259, metric = 4.1% * 6259 Minibatch[ 301- 400]: loss = 0.170403 * 6229, metric = 3.9% * 6229 Minibatch[ 401- 500]: loss = 0.159605 * 6289, metric = 3.4% * 6289 Finished Epoch [1]: [Training] loss = 0.296852 * 36061, metric = 5.7% * 36061 --> 0.05662627214996811 0.2968516879905391 Minibatch[ 1- 1]: loss = 0.000000 * 991, metric = 1.8% * 991 Minibatch[ 2- 2]: loss = 0.000000 * 1000, metric = 3.4% * 1000 Minibatch[ 3- 3]: loss = 0.000000 * 992, metric = 3.9% * 992 Minibatch[ 4- 4]: loss = 0.000000 * 989, metric = 4.1% * 989 Minibatch[ 5- 5]: loss = 0.000000 * 998, metric = 4.0% * 998 Minibatch[ 6- 6]: loss = 0.000000 * 995, metric = 1.2% * 995 Minibatch[ 7- 7]: loss = 0.000000 * 998, metric = 2.8% * 998 Minibatch[ 8- 8]: loss = 0.000000 * 992, metric = 2.9% * 992 Minibatch[ 9- 9]: loss = 0.000000 * 1000, metric = 2.0% * 1000 Minibatch[ 10- 10]: loss = 0.000000 * 996, metric = 8.2% * 996 Finished Epoch [1]: [Evaluation] loss = 0.000000 * 10984, metric = 3.5% * 10984 --> 0.035050983248361256 0.0 """ # plus BatchNorm with default_options(initial_state=0.1): # inject an option to mimic the BS version identically; remove some day run_model_test('plus BatchNorm', Sequential([ Embedding(emb_dim), BatchNormalization(), Recurrence(LSTM(hidden_dim), go_backwards=False), BatchNormalization(), Dense(num_labels) ]), [0.05662627214996811, 0.2968516879905391]) # plus lookahead with default_options(initial_state=0.1): # inject an option to mimic the BS version identically; remove some day run_model_test('plus lookahead', Sequential([ Embedding(emb_dim), with_lookahead(), BatchNormalization(), Recurrence(LSTM(hidden_dim), go_backwards=False), BatchNormalization(), Dense(num_labels) ]), [0.057901888466764646, 0.3044637752807047]) # replace lookahead by bidirectional model with default_options(initial_state=0.1): # inject an option to mimic the BS version identically; remove some day run_model_test('replace lookahead by bidirectional model', Sequential([ Embedding(emb_dim), BatchNormalization(), BiRecurrence(LSTM(hidden_dim), LSTM(hidden_dim)), BatchNormalization(), Dense(num_labels) ]), [0.0579573500457558, 0.3214986774820327]) # test of a config like in the example but with additions to test many code paths with default_options(enable_self_stabilization=True, use_peepholes=True): run_model_test('alternate paths', Sequential([ Embedding(emb_dim), BatchNormalization(), Recurrence(LSTM(hidden_dim, cell_shape=hidden_dim+50), go_backwards=True), BatchNormalization(map_rank=1), Dense(num_labels) ]), [0.08574360112032389, 0.41847621578367716]) # test of the example itself # this emulates the main code in the PY file if device_id >= 0: # sparse FSAdagrad currently does not run on CPU --TODO: fix this test once it does reader = create_reader(data_dir + "/atis.train.ctf", is_training=True) model = create_model_function() loss_avg, evaluation_avg = train(reader, model, max_epochs=1) expected_avg = [0.09698114255561419, 0.5290531086061565] assert np.allclose([evaluation_avg, loss_avg], expected_avg, atol=TOLERANCE_ABSOLUTE) # test reader = create_reader(data_dir + "/atis.test.ctf", is_training=False) evaluate(reader, model) # test of a config like in the example but with additions to test many code paths if device_id >= 0: # BatchNormalization currently does not run on CPU # Create a path to TensorBoard log directory and make sure it does not exist. abs_path = os.path.dirname(os.path.abspath(__file__)) tb_logdir = os.path.join(abs_path, 'language_understanding_test_log') if os.path.exists(tb_logdir): shutil.rmtree(tb_logdir) reader = create_reader(data_dir + "/atis.train.ctf", is_training=True) model = create_test_model() # TODO: update example to support tensorboard, or decide to not show it in all examples (in upcoming update of examples) loss_avg, evaluation_avg = train(reader, model, max_epochs=1) #, tensorboard_logdir=tb_logdir) log_number_of_parameters(model, trace_level=1) ; print() expected_avg = [0.084, 0.407364] assert np.allclose([evaluation_avg, loss_avg], expected_avg, atol=TOLERANCE_ABSOLUTE)
def set_global_vars(use_arg_parser = True): data_path = map_file_path # set and overwrite learning parameters globalvars['rpn_lr_factor'] = cfg["CNTK"].RPN_LR_FACTOR globalvars['frcn_lr_factor'] = cfg["CNTK"].FRCN_LR_FACTOR globalvars['e2e_lr_factor'] = cfg["CNTK"].E2E_LR_FACTOR globalvars['momentum_per_mb'] = cfg["CNTK"].MOMENTUM_PER_MB globalvars['e2e_epochs'] = 1 if cfg["CNTK"].FAST_MODE else cfg["CNTK"].E2E_MAX_EPOCHS globalvars['rpn_epochs'] = 1 if cfg["CNTK"].FAST_MODE else cfg["CNTK"].RPN_EPOCHS globalvars['frcn_epochs'] = 1 if cfg["CNTK"].FAST_MODE else cfg["CNTK"].FRCN_EPOCHS globalvars['rnd_seed'] = cfg.RNG_SEED globalvars['train_conv'] = cfg["CNTK"].TRAIN_CONV_LAYERS globalvars['train_e2e'] = cfg["CNTK"].TRAIN_E2E if use_arg_parser: parser = argparse.ArgumentParser() parser.add_argument('-datadir', '--datadir', help='Data directory where the ImageNet dataset is located', required=False, default=data_path) parser.add_argument('-outputdir', '--outputdir', help='Output directory for checkpoints and models', required=False, default=None) parser.add_argument('-logdir', '--logdir', help='Log file', required=False, default=None) parser.add_argument('-n', '--num_epochs', help='Total number of epochs to train', type=int, required=False, default=cfg["CNTK"].E2E_MAX_EPOCHS) parser.add_argument('-m', '--minibatch_size', help='Minibatch size', type=int, required=False, default=mb_size) parser.add_argument('-e', '--epoch_size', help='Epoch size', type=int, required=False, default=epoch_size) parser.add_argument('-q', '--quantized_bits', help='Number of quantized bits used for gradient aggregation', type=int, required=False, default='32') parser.add_argument('-r', '--restart', help='Indicating whether to restart from scratch (instead of restart from checkpoint file by default)', action='store_true') parser.add_argument('-device', '--device', type=int, help="Force to run the script on a specified device", required=False, default=None) parser.add_argument('-rpnLrFactor', '--rpnLrFactor', type=float, help="Scale factor for rpn lr schedule", required=False) parser.add_argument('-frcnLrFactor', '--frcnLrFactor', type=float, help="Scale factor for frcn lr schedule", required=False) parser.add_argument('-e2eLrFactor', '--e2eLrFactor', type=float, help="Scale factor for e2e lr schedule", required=False) parser.add_argument('-momentumPerMb', '--momentumPerMb', type=float, help="momentum per minibatch", required=False) parser.add_argument('-e2eEpochs', '--e2eEpochs', type=int, help="number of epochs for e2e training", required=False) parser.add_argument('-rpnEpochs', '--rpnEpochs', type=int, help="number of epochs for rpn training", required=False) parser.add_argument('-frcnEpochs', '--frcnEpochs', type=int, help="number of epochs for frcn training", required=False) parser.add_argument('-rndSeed', '--rndSeed', type=int, help="the random seed", required=False) parser.add_argument('-trainConv', '--trainConv', type=int, help="whether to train conv layers", required=False) parser.add_argument('-trainE2E', '--trainE2E', type=int, help="whether to train e2e (otherwise 4 stage)", required=False) args = vars(parser.parse_args()) if args['rpnLrFactor'] is not None: globalvars['rpn_lr_factor'] = args['rpnLrFactor'] if args['frcnLrFactor'] is not None: globalvars['frcn_lr_factor'] = args['frcnLrFactor'] if args['e2eLrFactor'] is not None: globalvars['e2e_lr_factor'] = args['e2eLrFactor'] if args['momentumPerMb'] is not None: globalvars['momentum_per_mb'] = args['momentumPerMb'] if args['e2eEpochs'] is not None: globalvars['e2e_epochs'] = args['e2eEpochs'] if args['rpnEpochs'] is not None: globalvars['rpn_epochs'] = args['rpnEpochs'] if args['frcnEpochs'] is not None: globalvars['frcn_epochs'] = args['frcnEpochs'] if args['rndSeed'] is not None: globalvars['rnd_seed'] = args['rndSeed'] if args['trainConv'] is not None: globalvars['train_conv'] = True if args['trainConv']==1 else False if args['trainE2E'] is not None: globalvars['train_e2e'] = True if args['trainE2E']==1 else False if args['outputdir'] is not None: globalvars['output_path'] = args['outputdir'] if args['logdir'] is not None: log_dir = args['logdir'] if args['device'] is not None: # Setting one worker on GPU and one worker on CPU. Otherwise memory consumption is too high for a single GPU. if Communicator.rank() == 0: cntk.device.try_set_default_device(cntk.device.gpu(args['device'])) else: cntk.device.try_set_default_device(cntk.device.cpu()) if args['datadir'] is not None: data_path = args['datadir'] if not os.path.isdir(data_path): raise RuntimeError("Directory %s does not exist" % data_path) globalvars['class_map_file'] = os.path.join(data_path, globalvars['class_map_file']) globalvars['train_map_file'] = os.path.join(data_path, globalvars['train_map_file']) globalvars['test_map_file'] = os.path.join(data_path, globalvars['test_map_file']) globalvars['train_roi_file'] = os.path.join(data_path, globalvars['train_roi_file']) globalvars['test_roi_file'] = os.path.join(data_path, globalvars['test_roi_file']) if cfg["CNTK"].FORCE_DETERMINISTIC: force_deterministic_algorithms() np.random.seed(seed=globalvars['rnd_seed']) globalvars['classes'] = parse_class_map_file(globalvars['class_map_file']) globalvars['num_classes'] = len(globalvars['classes']) if cfg["CNTK"].DEBUG_OUTPUT: # report args print("Using the following parameters:") print("Flip image : {}".format(cfg["TRAIN"].USE_FLIPPED)) print("Train conv layers: {}".format(globalvars['train_conv'])) print("Random seed : {}".format(globalvars['rnd_seed'])) print("Momentum per MB : {}".format(globalvars['momentum_per_mb'])) if globalvars['train_e2e']: print("E2E epochs : {}".format(globalvars['e2e_epochs'])) else: print("RPN lr factor : {}".format(globalvars['rpn_lr_factor'])) print("RPN epochs : {}".format(globalvars['rpn_epochs'])) print("FRCN lr factor : {}".format(globalvars['frcn_lr_factor'])) print("FRCN epochs : {}".format(globalvars['frcn_epochs']))