def __init__(self, model_fn, gpuid): os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" os.environ["CUDA_VISIBLE_DEVICES"] = str(gpuid) import cntk cntk.try_set_default_device(cntk.gpu(0)) cntk.use_default_device() self.model_fn = model_fn self.model = cntk.load_model(self.model_fn)
def set_device(device): if device == 'CPU': C.try_set_default_device(C.device.cpu()) elif device == 'GPU' or device == 'CUDA': try: C.try_set_default_device(C.device.gpu(0)) except: C.use_default_device() else: C.use_default_device()
def set_device(device): if device == 'CPU': C.try_set_default_device(C.device.cpu()) elif device == 'GPU' or device == 'CUDA': try: C.try_set_default_device(C.device.gpu(0)) except: C.use_default_device() else: C.use_default_device()
def test_set_gpu_as_default_device(): if len(C.device.all_devices()) == 1: return; # this will release any previous held device locks C.try_set_default_device(C.cpu(), False) for i in range(len(C.device.all_devices()) - 1): device = C.gpu(i) assert C.try_set_default_device(device, False) assert not is_locked(device) assert device == C.use_default_device() if not device.is_locked(): assert not is_locked(device) assert C.try_set_default_device(device, True) assert device == C.use_default_device() assert is_locked(device)
def test_set_gpu_as_default_device(): if len(C.device.all_devices()) == 1: return # this will release any previous held device locks C.try_set_default_device(C.cpu(), False) for i in range(len(C.device.all_devices()) - 1): device = C.gpu(i) assert C.try_set_default_device(device, False) assert not is_locked(device) assert device == C.use_default_device() if not device.is_locked(): assert not is_locked(device) assert C.try_set_default_device(device, True) assert device == C.use_default_device() assert is_locked(device)
def evaluate(reader, criterion, device=None, minibatch_size=16, max_samples=None): # process minibatches and perform evaluation if not device: device = use_default_device() evaluator = Evaluator(criterion) progress_printer = ProgressPrinter(tag='Evaluation', num_epochs=1) samples_evaluated = 0 while True: if (max_samples and samples_evaluated >= max_samples): break # Fetch minibatches until we hit the end mb = reader.next_minibatch(minibatch_size) if not mb: break metric = evaluator.test_minibatch({criterion.arguments[0]: mb[reader.streams.features], criterion.arguments[1]: mb[reader.streams.labels]}, device=device) samples_evaluated += minibatch_size progress_printer.update(0, mb[reader.streams.labels].num_samples, metric) # log progress loss, metric, actual_samples = progress_printer.epoch_summary(with_metric=True) return loss, metric
def load_model(model_filename: str): """A helper function to load the acoustic model from disc. Args: model_filename (str): The file path to the acoustic model. """ cntk_model = cntk.load_model(model_filename) print(cntk_model) # First try and find output by name model_output = cntk_model.find_by_name('ScaledLogLikelihood') print(model_output) # Fall back to first defined output if model_output is None: model_output = cntk_model.outputs[0] print(model_output) # Create an object restricted to the desired output. cntk_model = cntk.combine(model_output) print(cntk_model) # Optimized RNN models won't run on CPU without conversion. if 0 == cntk.use_default_device().type(): cntk_model = cntk.misc.convert_optimized_rnnstack(cntk_model) print("cntk model loaded") print(cntk_model) return cntk_model
def printDeviceType(boGpuRequired=False): if use_default_device().type() != 0: print("Using GPU for CNTK training/scoring.") else: print("WARNING: using CPU for CNTK training/scoring.") if boGpuRequired: raise Exception("Cannot find GPU or GPU is already locked.")
def _use_default_device(queue): # use_default_device needs to be tested in isolation # in a freshly created process environment. device = C.use_default_device() if (device.type() != C.device.DeviceKind.GPU): queue.put(not is_locked(device)) else: queue.put(is_locked(device))
def _use_default_device(queue): # use_default_device needs to be tested in isolation # in a freshly created process environment. device = C.use_default_device() if (device.type() != C.device.DeviceKind.GPU): queue.put(not is_locked(device)) else: queue.put(is_locked(device))
def test_set_cpu_as_default_device(): device = C.cpu() assert not is_locked(device) assert not C.try_set_default_device(device, True) assert not is_locked(device) assert C.try_set_default_device(device) assert C.try_set_default_device(device, False) assert not is_locked(device) assert device == C.use_default_device()
def test_set_cpu_as_default_device(): device = C.cpu() assert not is_locked(device) assert not C.try_set_default_device(device, True) assert not is_locked(device) assert C.try_set_default_device(device) assert C.try_set_default_device(device, False) assert not is_locked(device) assert device == C.use_default_device()
def load_model(self, model_filename): self.model_filename = model_filename cntk_model = cntk.load_model(model_filename) # First try and find output by name model_output = cntk_model.find_by_name('ScaledLogLikelihood') # Fall back to first defined output if model_output is None: model_output = cntk_model.outputs[0] # Create an object restricted to the desired output. cntk_model = cntk.combine(model_output) # Optimized RNN models won't run on CPU without conversion. if 0 == cntk.use_default_device().type(): cntk_model = cntk.misc.convert_optimized_rnnstack(cntk_model) self.model = cntk_model return self
# Copyright (c) 2018 Wang XX # # MIT License # http://www.opensource.org/licenses/mit-license.php # import cntk as C from cntk.initializer import xavier, glorot_uniform, normal from cntk.ops.functions import UserFunction from cntk.logging import ProgressPrinter import numpy as np import argparse print(C.device.all_devices()) try: C.device.try_set_default_device(C.device.gpu(0)) C.use_default_device() except: C.device.try_set_default_device(C.device.cpu()) C.use_default_device() class IndRNNUnit(object): def __init__(self, hidden_dim, input_size, recurrent_min_abs=None, recurrent_max_abs=None, recurrent_kernel_initializer=1.0, input_kernel_initializer=normal(0.01), activation=C.relu, name=None):
def init_train_fast_rcnn(image_height, image_width, num_classes, num_rois, mb_size, max_epochs, cntk_lr_per_image, l2_reg_weight, momentum_time_constant, base_path, boSkipTraining=False, debug_output=False, tensorboardLogDir=None): #make sure we use GPU for training if use_default_device().type() == 0: print("WARNING: using CPU for training.") else: print("Using GPU for training.") # Instantiate the Fast R-CNN prediction model image_input = input_variable((3, image_height, image_width)) roi_input = input_variable((num_rois, 4)) label_input = input_variable((num_rois, num_classes)) frcn_output, frcn_penultimateLayer = frcn_predictor( image_input, roi_input, num_classes, base_path) if boSkipTraining: print("Using pre-trained DNN without refinement") return frcn_penultimateLayer # Create the minibatch source and define mapping from reader streams to network inputs minibatch_source, epoch_size = create_mb_source("train", image_height, image_width, num_classes, num_rois, base_path, randomize=True) input_map = { image_input: minibatch_source.streams.features, roi_input: minibatch_source.streams.rois, label_input: minibatch_source.streams.roiLabels } # set loss / error functions ce = cross_entropy_with_softmax(frcn_output, label_input, axis=1) pe = classification_error(frcn_output, label_input, axis=1) if debug_output: plot(frcn_output, "graph_frcn.png") # set the progress printer(s) progress_writers = [ProgressPrinter(tag='Training', num_epochs=max_epochs)] if tensorboardLogDir != None: tensorboard_writer = TensorBoardProgressWriter( freq=10, log_dir=tensorboardLogDir, model=frcn_output) progress_writers.append(tensorboard_writer) # Set learning parameters and instantiate the trainer object lr_per_sample = [f / float(num_rois) for f in cntk_lr_per_image] lr_schedule = learning_rate_schedule(lr_per_sample, unit=UnitType.sample) mm_schedule = momentum_as_time_constant_schedule(momentum_time_constant) learner = momentum_sgd(frcn_output.parameters, lr_schedule, mm_schedule, l2_regularization_weight=l2_reg_weight) trainer = Trainer(frcn_output, (ce, pe), learner, progress_writers) # Get minibatches of images and perform model training print("Training Fast R-CNN model for %s epochs." % max_epochs) log_number_of_parameters(frcn_output) for epoch in range(max_epochs): sample_count = 0 # loop over minibatches in the epoch while sample_count < epoch_size: data = minibatch_source.next_minibatch(min( mb_size, epoch_size - sample_count), input_map=input_map) if sample_count % 100 == 1: print( "Training in progress: epoch {} of {}, sample count {} of {}" .format(epoch, max_epochs, sample_count, epoch_size)) trainer.train_minibatch(data) sample_count += trainer.previous_minibatch_sample_count # count samples processed so far trainer.summarize_training_progress() # Log mean of each parameter tensor, so that we can confirm that the parameters change indeed. if tensorboardLogDir != None: for parameter in frcn_output.parameters: tensorboard_writer.write_value(parameter.uid + "/mean", np.mean(parameter.value), epoch) tensorboard_writer.write_value(parameter.uid + "/std", np.std(parameter.value), epoch) tensorboard_writer.write_value(parameter.uid + "/absSum", np.sum(np.abs(parameter.value)), epoch) if debug_output: frcn_output.save_model("frcn_py_%s.model" % (epoch + 1)) return frcn_output
def load_cnn_model(fn, gpu_id=0): cntk.try_set_default_device(cntk.gpu(gpu_id)) cntk.use_default_device() return cntk.load_model(fn)