def convert(root_func, filter, converter): ''' Clones the graph underlying root_func and in the clone substitutes all Functions obtained by applying 'filter', with a new Function obtained by calling the specified 'converter' Args: root_func: a root function of a graph to be cloned and converted filter: a lambda for filtering out the Functions to be converted converter: a lambda for obtaining the substitute for each of the Functions to be converted Returns: Cloned and converted Function (graph) ''' # recursively convert for blocks in root_func blocks = C.logging.graph.depth_first_search(root_func, lambda x : type(x) == C.Function and x.root_function.is_block, depth = 0) for i in range(len(blocks)): # search for blocks again in case block input/output has been modified blocks1 = C.logging.graph.depth_first_search(root_func, lambda x : type(x) == C.Function and x.root_function.is_block, depth = 0) block = blocks1[i] # assuming depth_first_search order to be stable, so use the old index on new search results block_root = C.as_composite(block.block_root) new_block_root = convert(block_root, filter, converter) if new_block_root != block_root: block_arguments_mapping = dict(block.block_arguments_mapping) new_block_arguments_mapping = [] for arg, new_arg in zip(block_root.arguments, new_block_root.arguments): new_block_arguments_mapping += [(new_arg, block_arguments_mapping[arg])] new_block = C.as_block(new_block_root, new_block_arguments_mapping, block.op_name, block.name) if all([x not in root_func.outputs for x in block.outputs]) or all([x in block.outputs for x in root_func.outputs]): root_func = root_func.clone(C.CloneMethod.share, dict(zip(block.outputs, new_block.outputs))) else: new_outputs = [new_block.outputs[block.outputs.index(x)] if x in block.outputs else None for x in root_func.outputs] root_func_nonreplaced = C.combine([x for x in root_func.outputs if x not in block.outputs]) root_func_nonreplaced_clone = root_func_nonreplaced.clone(C.CloneMethod.share, dict(zip(block.outputs, new_block.outputs))) idx = 0 for nonreplaced_output in root_func_nonreplaced_clone.outputs: while new_outputs[idx]: idx += 1 new_outputs[idx] = nonreplaced_output root_func = C.combine(new_outputs) # replace all Function instances under root_func that pass the specified 'filter' functions_to_convert = C.logging.graph.depth_first_search(root_func, filter, depth = 0) for function_to_convert in functions_to_convert: converted = converter(function_to_convert) if not function_to_convert.output in root_func.outputs: root_func = root_func.clone(C.CloneMethod.share, {function_to_convert.output : converted.output}) else: # if cudnn_rnn output is the root_func output, just use converted as root_func and no clone needed if len(root_func.outputs) > 1: root_func = C.combine([converted if x == function_to_convert.output else x for x in root_func.outputs]) else: root_func = converted return root_func
def input_layer(self,cgw,cnw,cc,qgw,qnw,qc): cgw_ph = C.placeholder() cnw_ph = C.placeholder() cc_ph = C.placeholder() qgw_ph = C.placeholder() qnw_ph = C.placeholder() qc_ph = C.placeholder() input_chars = C.placeholder(shape=(1,self.word_size,self.c_dim)) input_glove_words = C.placeholder(shape=(self.wg_dim,)) input_nonglove_words = C.placeholder(shape=(self.wn_dim,)) # we need to reshape because GlobalMaxPooling/reduce_max is retaining a trailing singleton dimension # todo GlobalPooling/reduce_max should have a keepdims default to False embedded = C.splice( C.reshape(self.charcnn(input_chars), self.convs), self.embed()(input_glove_words, input_nonglove_words), name='splice_embed') processed = C.layers.Sequential([For(range(2), lambda: OptimizedRnnStack(self.hidden_dim, bidirectional=True, use_cudnn=self.use_cudnn, name='input_rnn'))])(embedded) qce = C.one_hot(qc_ph, num_classes=self.c_dim, sparse_output=self.use_sparse) cce = C.one_hot(cc_ph, num_classes=self.c_dim, sparse_output=self.use_sparse) q_processed = processed.clone(C.CloneMethod.share, {input_chars:qce, input_glove_words:qgw_ph, input_nonglove_words:qnw_ph}) c_processed = processed.clone(C.CloneMethod.share, {input_chars:cce, input_glove_words:cgw_ph, input_nonglove_words:cnw_ph}) return C.as_block( C.combine([c_processed, q_processed]), [(cgw_ph, cgw),(cnw_ph, cnw),(cc_ph, cc),(qgw_ph, qgw),(qnw_ph, qnw),(qc_ph, qc)], 'input_layer', 'input_layer')
def _Identity(name='identity_arg'): x = Placeholder(name=name) apply_x = combine([x]) # TODO: Let's not encourage users to use combine([f]) as a workaround for identity/pass, but rather have it as a first-class operator implemented that we then use. [Willi] #apply_x = alias(x) # TODO: does not work. Should it? #_name_and_extend_Function(apply_x, 'Identity') return Block(apply_x, 'Identity')
def model(self): c = C.Axis.new_unique_dynamic_axis('c') q = C.Axis.new_unique_dynamic_axis('q') b = C.Axis.default_batch_axis() cgw = C.input_variable(self.wg_dim, dynamic_axes=[b,c], is_sparse=self.use_sparse, name='cgw') cnw = C.input_variable(self.wn_dim, dynamic_axes=[b,c], is_sparse=self.use_sparse, name='cnw') qgw = C.input_variable(self.wg_dim, dynamic_axes=[b,q], is_sparse=self.use_sparse, name='qgw') qnw = C.input_variable(self.wn_dim, dynamic_axes=[b,q], is_sparse=self.use_sparse, name='qnw') cc = C.input_variable((1,self.word_size), dynamic_axes=[b,c], name='cc') qc = C.input_variable((1,self.word_size), dynamic_axes=[b,q], name='qc') ab = C.input_variable(self.a_dim, dynamic_axes=[b,c], name='ab') ae = C.input_variable(self.a_dim, dynamic_axes=[b,c], name='ae') #input layer c_processed, q_processed = self.input_layer(cgw,cnw,cc,qgw,qnw,qc).outputs # attention layer att_context = self.gated_attention_gru_layer(c_processed, q_processed) # seif-matching_attention layer match_context = self.matching_attention_layer(att_context) # output layer start_logits, end_logits = self.output_layer(q_processed, match_context).outputs # loss start_loss = seq_loss(start_logits, ab) end_loss = seq_loss(end_logits, ae) #paper_loss = start_loss + end_loss new_loss = all_spans_loss(start_logits, ab, end_logits, ae) return C.combine([start_logits, end_logits]), new_loss
def create_faster_rcnn_eval_model(model, image_input, dims_input, cfg, rpn_model=None): print("creating eval model") last_conv_node_name = cfg["MODEL"].LAST_CONV_NODE_NAME conv_layers = clone_model(model, [cfg["MODEL"].FEATURE_NODE_NAME], [last_conv_node_name], CloneMethod.freeze) conv_out = conv_layers(image_input) model_with_rpn = model if rpn_model is None else rpn_model rpn = clone_model(model_with_rpn, [last_conv_node_name], ["rpn_cls_prob_reshape", "rpn_bbox_pred"], CloneMethod.freeze) rpn_out = rpn(conv_out) # we need to add the proposal layer anew to account for changing configs when buffering proposals in 4-stage training rpn_rois = create_proposal_layer(rpn_out.outputs[0], rpn_out.outputs[1], dims_input, cfg) roi_fc_layers = clone_model(model, [last_conv_node_name, "rpn_target_rois"], ["cls_score", "bbox_regr"], CloneMethod.freeze) pred_net = roi_fc_layers(conv_out, rpn_rois) cls_score = pred_net.outputs[0] bbox_regr = pred_net.outputs[1] if cfg.BBOX_NORMALIZE_TARGETS: num_boxes = int(bbox_regr.shape[1] / 4) bbox_normalize_means = np.array(cfg.BBOX_NORMALIZE_MEANS * num_boxes) bbox_normalize_stds = np.array(cfg.BBOX_NORMALIZE_STDS * num_boxes) bbox_regr = plus(element_times(bbox_regr, bbox_normalize_stds), bbox_normalize_means, name='bbox_regr') cls_pred = softmax(cls_score, axis=1, name='cls_pred') eval_model = combine([cls_pred, rpn_rois, bbox_regr]) return eval_model
def calculate_loss_vector(network, path, location_path, communicator): source = DataSource(path, opt.vocab_file, location_path, opt.seqlength, opt.batchsize) # the curr row -> the curr col # the curr col -> the next row row_loss = C.log(C.softmax(network['model'].outputs[0])) col_loss = C.log(C.softmax(network['model'].outputs[1])) loss = C.combine([row_loss, col_loss]) row_loss_vector = np.zeros((opt.vocabsize, vocab_sqrt)) col_loss_vector = np.zeros((opt.vocabsize, vocab_sqrt)) flag = True while flag: mb = source.next_minibatch(opt.seqlength * opt.batchsize * Communicator.num_workers(), Communicator.num_workers(), communicator.rank()) result = loss.eval({ network['row']: mb[source.input1], network['col']: mb[source.input2], }) row_prob = result[loss.outputs[0]] col_prob = result[loss.outputs[1]] label1 = mb[source.word1].asarray() label2 = mb[source.word2].asarray() sequences = len(label1) for i in range(sequences): seqlength = len(row_prob[i]) for j in range(seqlength): row_word = int(label1[i][j][0]) col_word = int(label2[i][j][0]) row_loss_vector[row_word] -= row_prob[i][j] col_loss_vector[col_word] -= col_prob[i][j] flag = not mb[source.input1].sweep_end return col_loss_vector, row_loss_vector
def clone(self, method, substitutions=None): ''' Clones the function. The parameters of the Function are either cloned, shared or frozen as specified by the method argument and any variable substitutions requested are applied in the cloned Function instance. Args: method (:class:`CloneMethod`): one of * 'clone': the returned function gets its own copy of parameters (default) * 'share': the returned function shares its parameters with this function * 'freeze': parameters are cloned and made immutable (constant). substitutions (dict): a dictionary mapping variables in this function to variables in the cloned function Returns: :class:`~cntk.ops.functions.Function`: the cloned Function ''' # C++ clone() can only clone composites. If we are not a composite, make it one using combine() if not self.is_composite: from cntk import combine #return combine([self]).clone(method, substitutions).root_function.arguments[0].owner # BUGBUG: This ^^ does not give me the correct .arguments, so we leave the extra combine() in for now. return combine([self]).clone(method, substitutions) method = getattr(cntk_py, 'ParameterCloningMethod_' + CloneMethod(method).name.capitalize()) substitutions = substitutions or {} if not isinstance(substitutions, dict): raise TypeError("Variable substitution map must be a dictionary") return super(Function, self).clone(method, substitutions)
def test_block_with_unused_outputs(): p1 = C.placeholder() p3 = C.placeholder() func1 = C.as_block(p1 + 1, [(p1, p3)], 'plus_func_1') p2 = C.placeholder() p4 = C.placeholder() func2 = C.as_block(p2 + 1, [(p2, p4)], 'plus_func_2') p5 = C.placeholder() func3 = C.as_block(C.combine([func2]), [(p4, p5)], 'empty_block') input_var1 = C.input_variable(shape=()) input_var2 = C.input_variable(shape=()) block = C.as_block(C.combine([func1, func3]), [(p3, input_var1), (p5, input_var2)], 'multi_output_block') eval_root = C.combine([block.outputs[0]]) result = eval_root.eval({input_var1 : np.asarray([3], dtype=np.float32), input_var2 : np.asarray([-3], dtype=np.float32)}) assert np.array_equal(result, [ 4.])
def identity(keep): ''' Identity function. There is no factory for it because there is only one identity function. ''' # Note: We cannot use alias() here since parameter-shape inference cannot be done through alias(). return combine([keep])
def gru_cell(shape, init=init_default_or_glorot_uniform, name=''): # (x, (h,c)) shape = _as_tuple(shape) if len(shape) != 1 : raise ValueError("gru_cell: shape must be vectors (rank-1 tensors)") # determine stacking dimensions cell_shape_stacked = shape * 2 # patched dims with stack_axis duplicated 4 times # parameters Wz = Parameter(cell_shape_stacked, init = init, name='Wz') Wr = Parameter(cell_shape_stacked, init = init, name='Wr') Wh = Parameter(cell_shape_stacked, init = init, name='Wh') Uz = Parameter( _INFERRED + shape, init = init, name = 'Uz') Ur = Parameter( _INFERRED + shape, init = init, name = 'Ur') Uh = Parameter( _INFERRED + shape, init = init, name = 'Uh') def create_s_placeholder(): # we pass the known dimensions here, which makes dimension inference easier return Placeholder(shape=shape, name='S') # (h, c) # parameters to model function x = Placeholder(name='gru_block_arg') prev_status = create_s_placeholder() # formula of model function Sn_1 = prev_status z = sigmoid(times(x, Uz, name='x*Uz') + times(Sn_1, Wz, name='Sprev*Wz'), name='z') r = sigmoid(times(x, Ur, name='x*Ur') + times(Sn_1, Wr, name='Sprev*Wr'), name='r') h = tanh(times(x, Uh, name='x*Uh') + times(element_times(Sn_1, r, name='Sprev*r'), Wh), name='h') s = plus(element_times((1-z), h, name='(1-z)*h'), element_times(z, Sn_1, name='z*SPrev'), name=name) apply_x_s = combine([s]) apply_x_s.create_placeholder = create_s_placeholder return apply_x_s
def create_model(input_dim): row = sequence.input_variable(shape=input_dim) col = sequence.input_variable(shape=input_dim) rowh = Sequential([Embedding(opt.embed), Stabilizer(), Dropout(opt.dropout)])(row) colh = Sequential([Embedding(opt.embed), Stabilizer(), Dropout(opt.dropout)])(col) x = C.splice(rowh, colh, axis=-1) x = lightlstm(opt.embed, opt.nhid)(x) x = For(range(opt.layer-1), lambda: lightlstm(opt.nhid, opt.nhid))(x) rowh = C.slice(x, -1, opt.nhid * 0, opt.nhid * 1) colh = C.slice(x, -1, opt.nhid * 1, opt.nhid * 2) row_predict = Sequential([Dropout(opt.dropout), Dense(input_dim)])(rowh) col_predict = Sequential([Dropout(opt.dropout), Dense(input_dim)])(colh) # variable : row label and col label row_label = sequence.input_variable(shape=input_dim) col_label = sequence.input_variable(shape=input_dim) model = C.combine([row_predict, col_predict]) return {'row': row, 'col': col, 'row_label': row_label, 'col_label': col_label, 'model': model}
def test_constant_eval(): c = C.Constant(value=1) c_plus_1 = c + 1 op = C.combine([c_plus_1, c]) result = op.eval({}) assert np.array_equal(result[c_plus_1.output], 2.0) assert np.array_equal(result[c], 1.0)
def test_output_subset_evaluation(device_id): try: gpu_device = C.gpu(0) except ValueError: pytest.skip('Test only runs when GPU available') device = cntk_device(device_id) x1 = C.input_variable(shape=()) op1 = C.constant(value=1, shape=(1), device=device) + (C.constant(value=1, shape=(1), device=device) + x1) x2 = C.input_variable(shape=(1)) # Deliberately locate the parameter on a different device # instead of the actual compute target device, so that # if we try to use this parameter, it results in an error if (device.type() == 0): parameter_device = gpu_device else: parameter_device = C.cpu() p = C.parameter(shape=(1), init=C.glorot_uniform(), device=parameter_device) op2 = (x2 - C.constant(value=10, shape=(1), device=device)) - p op = C.combine([op1, op2]); _, result = op.forward({x1 : np.asarray([1, 2, 3])}, [op1], device=device) assert np.array_equal(result[op1], np.asarray([[3], [4], [5]]))
def test_eval_again_with_prev_outputs_live(device_id): x = C.input_variable(2) dev = cntk_device(device_id) w1 = C.parameter(init=np.asarray([1], dtype=np.float32), device=dev) w2 = C.parameter(init=np.asarray([-1], dtype=np.float32), device=dev) out1 = x + w1 out2 = x + w2 op = C.combine([out1, out2]) result1 = op.eval({x : np.asarray([2, 5], dtype=np.float32)}, device=dev) assert np.array_equal(result1[out1.output], [[3, 6]]) assert np.array_equal(result1[out2.output], [[1, 4]]) result2 = op.eval({x : np.asarray([[-1, 4], [-4, 7]], dtype=np.float32)}, device=dev) assert np.array_equal(result2[out1.output], [[0, 5], [-3, 8]]) assert np.array_equal(result2[out2.output], [[-2, 3], [-5, 6]]) # result1 should still be valid assert np.array_equal(result1[out1.output], [[3, 6]]) assert np.array_equal(result1[out2.output], [[1, 4]]) result1 = op.eval({x : np.asarray([2, 5], dtype=np.float32)}, device=dev, as_numpy=False) assert np.array_equal(result1[out1.output].asarray(), [[3, 6]]) assert np.array_equal(result1[out2.output].asarray(), [[1, 4]]) result2 = op.eval({x : np.asarray([[-1, 4], [-4, 7]], dtype=np.float32)}, device=dev, as_numpy=False) assert np.array_equal(result2[out1.output].asarray(), [[0, 5], [-3, 8]]) assert np.array_equal(result2[out2.output].asarray(), [[-2, 3], [-5, 6]]) # Accessing result1 now will cause an error since it was a temporary that # is now erased, due to the subsequent eval call with pytest.raises(RuntimeError): assert np.array_equal(result1[out1.output].asarray(), [[3, 6]]) grad_op = out1 + out2 grad1 = grad_op.grad({x : np.asarray([2, 5], dtype=np.float32)}, wrt=[w1, w2], device=dev) assert np.array_equal(grad1[w1], [2]) assert np.array_equal(grad1[w2], [2]) grad2 = grad_op.grad({x : np.asarray([[-1, 4], [-4, 7]], dtype=np.float32)}, wrt=[w1, w2], device=dev) assert np.array_equal(grad2[w1], [4]) assert np.array_equal(grad2[w2], [4]) # grad1 should still be valid assert np.array_equal(grad1[w1], [2]) assert np.array_equal(grad1[w2], [2]) grad1 = grad_op.grad({x : np.asarray([2, 5], dtype=np.float32)}, wrt=[w1, w2], device=dev, as_numpy=False) assert np.array_equal(grad1[w1].asarray(), [2]) assert np.array_equal(grad1[w2].asarray(), [2]) grad2 = grad_op.grad({x : np.asarray([[-1, 4], [-4, 7]], dtype=np.float32)}, wrt=[w1, w2], device=dev, as_numpy=False) assert np.array_equal(grad2[w1].asarray(), [4]) assert np.array_equal(grad2[w2].asarray(), [4]) # Accessing grad1 now will cause an error since it was a temporary that # is now erased, due to the subsequent grad call with pytest.raises(RuntimeError): assert np.array_equal(grad1[w1].asarray(), [2])
def test_grad_custimized_root(): x = C.input_variable(shape=(1,), needs_gradient=True) y = C.sqrt(x) y2 = C.log(x) combine = C.combine([y.output, y2.output]) a = np.asarray([1,4,16], dtype=np.float32).reshape(3,1) grads = combine.grad({x:a}, grad_root = y.output) expect_grad = np.asarray([[0.5],[0.25],[0.125]], dtype=np.float32) assert np.array_equal(grads, expect_grad)
def test_universal(): np.random.seed(98052) builtin_sgd = lambda params: sgd(params, lr=learning_rate_schedule(0.125, UnitType.minibatch)) builtin_last_avg_error, builtin_avg_error, _ = ffnet(builtin_sgd) np.random.seed(98052) my_sgd = lambda ps, gs: C.combine([C.assign(p, p - 0.125/25 * g) for p, g in zip(ps, gs)]) universal_sgd = lambda params: universal(my_sgd, params) my_last_avg_error, my_avg_error, _ = ffnet(universal_sgd) assert np.all(np.less_equal(my_last_avg_error, builtin_last_avg_error)) assert np.all(np.less_equal(my_avg_error, builtin_avg_error))
def _inject_name(f, name): ''' Call this at the end of any layer or block that takes an optional name argument. ''' if name: if len(f.outputs) == 1: f = alias(f, name=name) else: f = combine(list(f.outputs), name=name) # BUGBUG: Does this actually name things? return f
def test_clone_with_unfound_previous_node(): x = C.input_variable(()) y = C.combine(x * x, x + x) y0 = y[0] y1 = y[1] y0_new = C.plus(y0,0, name="test") X=C.logging.find_by_name(y0_new, 'QueryReply_y') with pytest.raises(AttributeError): y_clone = y.clone(C.CloneMethod.share, {X:y0_new})
def _get_loss_metric(criterion): # helper to interpret criterion parameter if isinstance(criterion, cntk_py.Function): # input can be a tuple of Functions or a tuple-valued Function criterion = criterion.outputs # break up tuple-valued Function into tuple of Functions # map Variable to Function from cntk import combine criterion = tuple([combine([output], name=output.name) if isinstance(output, cntk_py.Variable) else output for output in criterion]) if len(criterion) == 1: criterion = criterion + (None,) # tuple of 1 value: pad with None elif len(criterion) != 2: raise ValueError("criterion parameter must be a singleton or a tuple of 2 elements") return criterion
def debug_attention(model, input): q = C.combine([model, model.attention_model.attention_weights]) #words, p = q(input) # Python 3 words_p = q(input) words = words_p[0] p = words_p[1] output_seq_len = words[0].shape[0] p_sq = np.squeeze(p[0][:output_seq_len,:,:]) # (batch, output_len, input_len, 1) opts = np.get_printoptions() np.set_printoptions(precision=5) print(p_sq) np.set_printoptions(**opts)
def clone_model(base_model, from_node_names, to_node_names, clone_method): from_nodes = [find_by_name(base_model, node_name) for node_name in from_node_names] if None in from_nodes: print("Error: could not find all specified 'from_nodes' in clone. Looking for {}, found {}" .format(from_node_names, from_nodes)) to_nodes = [find_by_name(base_model, node_name) for node_name in to_node_names] if None in to_nodes: print("Error: could not find all specified 'to_nodes' in clone. Looking for {}, found {}" .format(to_node_names, to_nodes)) input_placeholders = dict(zip(from_nodes, [placeholder() for x in from_nodes])) cloned_net = combine(to_nodes).clone(clone_method, input_placeholders) return cloned_net
def test_clone_with_wrong_type_node(): x = C.input_variable(()) y = C.combine(x * x, x + x) y0 = y[0] y1 = y[1] y0_new = C.plus(y0,0, name="test") X=C.logging.find_by_name(y0_new, 'QueryReply_y') a = 5 with pytest.raises(TypeError): y_clone = y.clone(C.CloneMethod.share, {y0:a})
def output_layer(self, query, match_context): q_processed = C.placeholder(shape=(2*self.hidden_dim,)) mat_context = C.placeholder(shape=(2*self.hidden_dim,)) #output layer r_q = question_pooling(q_processed, 2*self.hidden_dim) #shape n*(2*self.hidden_dim) p1_logits = attention_weight(mat_context, r_q, 2*self.hidden_dim) attention_pool = C.sequence.reduce_sum(p1_logits * mat_context) state = C.layers.GRU(2*self.hidden_dim)(attention_pool, r_q) p2_logits = attention_weight(mat_context, state, 2*self.hidden_dim) @C.Function def start_ave_point(p1_logits, p2_logits, point): @C.Function def start_ave(last, now): now = now + last - last new_start = now * C.sequence.gather(p2_logits, point) point = C.sequence.future_value(point) return new_start start_logits_ave = C.layers.Recurrence(start_ave)(p1_logits) return start_logits_ave point = C.sequence.is_first(p1_logits) point = C.layers.Sequential([For(range(2), lambda: C.layers.Recurrence(C.plus))])(point) point = C.greater(C.constant(16), point) start_logits_ave = start_ave_point(p1_logits, p2_logits, point) @C.Function def end_ave_point(p1_logits, p2_logits, point): @C.Function def end_ave(last, now): now = now + last - last new_end = now * C.sequence.gather(p2_logits, point) point = C.sequence.past_value(point) return new_end end_logits_ave = C.layers.Recurrence(end_ave, go_backwards=True)(p2_logits) return end_logits_ave point = C.sequence.is_last(p1_logits) point = C.layers.Sequential([For(range(2), lambda: C.layers.Recurrence(C.plus, go_backwards=True))])(point) point = C.greater(C.constant(16),point) end_logits_ave = end_ave_point(p1_logits, p2_logits, point) start_logits = seq_hardmax(start_logits_ave) end_logits = seq_hardmax(end_logits_ave) ''' start_logits = seq_hardmax(p1_logits) end_logits = seq_hardmax(p2_logits) ''' return C.as_block( C.combine([start_logits, end_logits]), [(q_processed, query), (mat_context, match_context)], 'output_layer', 'output_layer')
def debug_attention(model, input): q = combine([model, model.attention_model.attention_weights]) #words, p = q(input) # Python 3 words_p = q(input) words = words_p[0] p = words_p[1] seq_len = words[0].shape[attention_axis-1] span = 7 #attention_span #7 # test sentence is 7 tokens long p_sq = np.squeeze(p[0][:seq_len,:span,0,:]) # (batch, len, attention_span, 1, vector_dim) opts = np.get_printoptions() np.set_printoptions(precision=5) print(p_sq) np.set_printoptions(**opts)
def test_debug_multi_output(): input_dim = 2 num_output_classes = 2 f_input = input_variable(input_dim, np.float32, needs_gradient=True, name='features') p = parameter(shape=(input_dim,), init=10, name='p') comb = combine([f_input, p]) ins = InStream(['n', 'n', 'n', 'n', 'n']) outs = OutStream() z = times(comb.outputs[0], comb.outputs[1], name='z') z = debug_model(z, ins, outs) l_input = input_variable(num_output_classes, np.float32, name='labels') loss = cross_entropy_with_softmax(z, l_input) eval_error = classification_error(z, l_input) _train(z, loss, eval_error, loss.find_by_name('features'), loss.find_by_name('labels'), num_output_classes, 1) # outs.written contains something like # =================================== forward =================================== # Parameter('p', [], [2]) with uid 'Parameter4' # Input('features', [#, *], [2]) with uid 'Input3' # Times: Output('UserDefinedFunction12_Output_0', [#, *], [2]), Output('UserDefinedFunction15_Output_0', [], [2]) -> Output('z', [#, *], [2 x 2]) with uid 'Times21' # =================================== backward =================================== # Times: Output('UserDefinedFunction12_Output_0', [#, *], [2]), Output('UserDefinedFunction15_Output_0', [], [2]) -> Output('z', [#, *], [2 x 2]) with uid 'Times21' # Input('features', [#, *], [2]) with uid 'Input3' # Parameter('p', [], [2]) with uid 'Parameter4' assert outs.written == out_stuff assert len(outs.written) == 8 v_p = "Parameter('p', " v_i = "Input('features'" v_t = 'Times: ' assert outs.written[0].startswith('=') and 'forward' in outs.written[0] line_1, line_2, line_3 = outs.written[1:4] assert outs.written[4].startswith('=') and 'backward' in outs.written[4] line_5, line_6, line_7 = outs.written[5:8] assert line_5.startswith(v_t) assert line_6.startswith(v_p) and line_7.startswith(v_i) or \ line_6.startswith(v_i) and line_7.startswith(v_p)
def test_eval_not_all_outputs(): x = C.input_variable(1) x_data = [AA([3], dtype=np.float32)] y = C.input_variable(1) y_data = [AA([2], dtype=np.float32)] plus_func = x + 1 minus_func = y - 1 func = combine([plus_func, minus_func]) result = func.eval({x : x_data}, [plus_func]) assert np.array_equal(result, np.asarray([[4.]])) result = func.eval({y : y_data}, [minus_func]) assert np.array_equal(result, np.asarray([[1.]]))
def test_assign_dependency(input_data, device_id, precision): dt = PRECISION_TO_TYPE[precision] data = AA(input_data, dtype=dt) value = C.parameter(init=data) dest = C.parameter(shape=data.shape, dtype=dt) assign_op = C.assign(dest, value) y = dest + value result = C.combine([y, assign_op]).eval() assert np.array_equal(result[y.output], data) assert np.array_equal(dest.asarray(), data) assert np.array_equal(y.eval(), data + data)
def _simple_dict(): d = {} d['i1'] = C.input_variable(shape=(2, 3), name='i1') d['c1'] = C.constant(shape=(2, 3), value=6, name='c1') d['p1'] = C.parameter(shape=(3, 2), init=7, name='p1') d['op1'] = C.plus(d['i1'], d['c1'], name='op1') d['op2'] = C.times(d['op1'], d['p1'], name='op2') d['root'] = d['op2'] d['target'] = C.input_variable((), name='label') d['all'] = C.combine([d['root'], C.minus( d['target'], C.constant(1, name='c2'), name='minus')], name='all') return d
def test_normal_diff_along_batch(arg0, arg1, device_id, precision): dt = PRECISION_TO_TYPE[precision] dev = cntk_device(device_id) N = 1000 B = 10.0 / np.sqrt(N) x = C.sequence.input_variable(1, dtype=dt) x0 = np.zeros((N,2,1), dtype=dt) z = cr.normal_like(x, arg0, arg1, seed=98052) diff = C.sequence.first(z)-C.sequence.last(z) mean = C.reduce_mean(diff, axis=C.Axis.all_axes()) var = C.reduce_mean(diff*diff, axis=C.Axis.all_axes()) expr = C.combine([mean, var]) values = expr.eval({x:x0}, device=dev) assert np.abs(values[mean.output]) < B assert np.abs(values[var.output] - 2*arg1*arg1) < np.sqrt(2)*arg1*B
def eval_and_write(model_file, node_name, output_file, minibatch_source, num_objects): # load model and pick desired node as output loaded_model = load_model(model_file) node_in_graph = loaded_model.find_by_name(node_name) output_nodes = combine([node_in_graph.owner]) # evaluate model and get desired node output print("Evaluating model for output node %s" % node_name) features_si = minibatch_source['features'] with open(output_file, 'wb') as results_file: for i in range(0, num_objects): mb = minibatch_source.next_minibatch(1) output = output_nodes.eval(mb[features_si]) # write results to file out_values = output[0].flatten() np.savetxt(results_file, out_values[np.newaxis], fmt="%.6f")
def test_evaluating_multiple_outputs(): input_data = AA([1], np.float32) a = C.input_variable(shape=input_data.shape, name='a') a_plus_1 = a + 1 out1 = ((a_plus_1 + 2) - 1) + 1 out2 = ((a_plus_1 + 4) - 1) + 2 z = C.combine([out1, out2]) # create batch input_data.shape = (1, 1) + input_data.shape res = z.eval({a: input_data}) expected_forward_out1 = [[4.]] expected_forward_out2 = [[7.]] assert np.array_equal(res[out1.output], expected_forward_out1) assert np.array_equal(res[out2.output], expected_forward_out2)
def init(): """ Initialise ResNet 152 model """ global trainedModel, labelLookup, mem_after_init start = t.default_timer() # Load the model and labels from disk with open(LABEL_FILE, 'r') as f: labelLookup = [l.rstrip() for l in f] # Load model and load the model from brainscript (3rd index) trainedModel = load_model(MODEL_FILE) trainedModel = combine([trainedModel.outputs[2].owner]) end = t.default_timer() loadTimeMsg = "Model loading time: {0} ms".format(round((end - start) * 1000, 2)) logger.info(loadTimeMsg)
def _get_loss_metric(criterion): # helper to interpret criterion parameter if isinstance( criterion, cntk_py.Function ): # input can be a tuple of Functions or a tuple-valued Function criterion = criterion.outputs # break up tuple-valued Function into tuple of Functions # map Variable to Function from cntk import combine criterion = tuple([ combine([output], output.name) if isinstance( output, cntk_py.Variable) else output for output in criterion ]) if len(criterion) == 1: criterion = criterion + (None, ) # tuple of 1 value: pad with None elif len(criterion) != 2: raise ValueError( "criterion parameter must be a singleton or a tuple of 2 elements" ) return criterion
def _simple_dict(): d = {} d['i1'] = C.input_variable(shape=(2, 3), name='i1') d['c1'] = C.constant(shape=(2, 3), value=6, name='c1') d['p1'] = C.parameter(shape=(3, 2), init=7, name='p1') d['op1'] = C.plus(d['i1'], d['c1'], name='op1') d['op2'] = C.times(d['op1'], d['p1'], name='op2') d['root'] = d['op2'] d['target'] = C.input_variable((), name='label') d['all'] = C.combine([ d['root'], C.minus(d['target'], C.constant(1, name='c2'), name='minus') ], name='all') return d
def build_model(self): phmap = self.get_inputs() cc = phmap['cc'] qc = phmap['qc'] ab = phmap['ab'] ae = phmap['ae'] df = phmap['df'] qf = phmap['qf'] #self.info['query'] = C.splice(qgw, qnw) #self.info['doc'] = C.splice(cgw, gnw) elmo_encoder = self.__elmo_fac.build() #input layer reduction_cc = C.reshape(cc, (-1, )) reduction_qc = C.reshape(qc, (-1, )) c_elmo = elmo_encoder(reduction_cc) q_elmo = elmo_encoder(reduction_qc) c_processed, q_processed = self.input_layer(phmap['cgw'], phmap['cnw'], phmap['qgw'], phmap['qnw']).outputs # attention layer c_enhance = C.splice(c_processed, c_elmo, df) q_enhance = C.splice(q_processed, q_elmo, qf) att_context, wei = self.attention_layer(c_enhance, q_enhance, dimc= 2*self.hidden_dim+1027, dimq=2*self.hidden_dim+1025,\ common_dim=2*self.hidden_dim+1024).outputs self_context = self.self_attention_layer(att_context) # 2*hidden_dim # modeling layer mod_context = self.modeling_layer(self_context) enhance_mod_context = C.splice(mod_context, c_elmo, df) # output layer start_logits, end_logits = self.output_layer( att_context, enhance_mod_context).outputs # loss start_loss = seq_loss(start_logits, ab) end_loss = seq_loss(end_logits, ae) regulizer = 0.001 * C.reduce_sum( elmo_encoder.scales * elmo_encoder.scales) new_loss = all_spans_loss(start_logits, ab, end_logits, ae) + regulizer self._model = C.combine([start_logits, end_logits]) self._loss = new_loss return self._model, self._loss, self._input_phs
def attention_layer(self, context, query, dimc, dimq, common_dim): q_processed = C.placeholder(shape=(dimq, )) c_processed = C.placeholder(shape=(dimc, )) #convert query's sequence axis to static qvw, qvw_mask = C.sequence.unpack(q_processed, padding_value=0).outputs # so W * [h; u; h.* u] becomes w1 * h + w2 * u + w4 * (h.*u) ws1 = C.parameter(shape=(dimc, 1), init=C.glorot_uniform()) ws2 = C.parameter(shape=(dimq, 1), init=C.glorot_uniform()) ws4 = C.parameter(shape=(1, common_dim), init=C.glorot_uniform()) att_bias = C.parameter(shape=(), init=0) wh = C.times(c_processed, ws1) # [#,c][1] wu = C.reshape(C.times(qvw, ws2), (-1, )) # [#][*] # qvw*ws4: [#][*,200], whu:[#,c][*] whu = C.reshape(C.reduce_sum( c_processed[:common_dim] *\ C.sequence.broadcast_as(qvw[:,:common_dim] * ws4, c_processed), axis=1), (-1,)) S1 = wh + C.sequence.broadcast_as(wu, c_processed) + att_bias # [#,c][*] qvw_mask_expanded = C.sequence.broadcast_as(qvw_mask, c_processed) S1 = C.element_select(qvw_mask_expanded, S1, C.constant(-1e+30)) q_attn = C.reshape(C.softmax(S1), (-1, 1)) # [#,c][*,1] c2q = C.reshape( C.reduce_sum(C.sequence.broadcast_as(qvw, q_attn) * q_attn, axis=0), (-1)) # [#,c][200] max_col = C.reduce_max(S1) # [#,c][1] 最大的q中的单词 c_attn = C.sequence.softmax(max_col) # [#,c][1] 对c中的每一个单词做softmax htilde = C.sequence.reduce_sum(c_processed * c_attn) # [#][200] q2c = C.sequence.broadcast_as(htilde, c_processed) # [#,c][200] q2c_out = c_processed[:common_dim] * q2c[:common_dim] # 原始文档,题目表示,文章重点表示,匹配度表示,文章上下文表示 att_context_reg = C.splice(c_processed, c2q, q2c_out, c_processed[:common_dim] * c2q[:common_dim]) res = C.combine(att_context_reg, C.reshape(q_attn, (-1, ))) return \ C.as_block(res, [(c_processed, context), (q_processed, query)], 'attention_layer', 'attention_layer')
def dot_attention(self, inputs, memory, dim): ''' @inputs: [#,c][d] a sequence need attention @memory(key): [#,q][d] a sequence input refers to compute similarity(weight) @value: [#,q][d] a sequence input refers to weighted sum @output: [#,c][d] attention vector ''' input_ph = C.placeholder() input_mem = C.placeholder() with C.layers.default_options( bias=False, activation=C.relu): # all the projections have no bias attn_proj_enc = C.layers.Dense(dim, init=glorot_uniform(), input_rank=1, name="Wqu") attn_proj_dec = C.layers.Dense(dim, init=glorot_uniform(), input_rank=1) inputs_ = attn_proj_enc(input_ph) # [#,c][d] memory_ = attn_proj_dec(input_mem) # [#,q][d] unpack_memory, mem_mask = C.sequence.unpack( memory_, 0).outputs # [#][*=q, d], [#][*=q] unpack_memory_expand = C.sequence.broadcast_as(unpack_memory, inputs_) # [#,c][*=q,d] matrix = C.times_transpose(inputs_, unpack_memory_expand) / ( dim**0.5) # [#,c][*=q] mem_mask_expand = C.sequence.broadcast_as(mem_mask, inputs_) # [#,c][*=q] matrix = C.element_select(mem_mask_expand, matrix, C.constant(-1e+30)) # [#,c][*=q] logits = C.reshape(C.softmax(matrix), (-1, 1)) # [#,c][*=q,1] # [#,c][*=q, d] memory_expand = C.sequence.broadcast_as( C.sequence.unpack(input_mem, 0, no_mask_output=True), input_ph) weighted_att = C.reshape(C.reduce_sum(logits * memory_expand, axis=0), (-1, )) # [#,c][d] return C.as_block(C.combine(weighted_att, logits), [(input_ph, inputs), (input_mem, memory)], 'dot attention', 'dot attention')
def gru_cell(shape, init=glorot_uniform(), name=''): # (x, (h,c)) """ GRU cell function """ shape = _as_tuple(shape) if len(shape) != 1: raise ValueError("gru_cell: shape must be vectors (rank-1 tensors)") # determine stacking dimensions cell_shape_stacked = shape * 2 # patched dims with stack_axis duplicated 2 times # parameters Wz = Parameter(cell_shape_stacked, init=init, name='Wz') Wr = Parameter(cell_shape_stacked, init=init, name='Wr') Wh = Parameter(cell_shape_stacked, init=init, name='Wh') Uz = Parameter(_INFERRED + shape, init=init, name='Uz') Ur = Parameter(_INFERRED + shape, init=init, name='Ur') Uh = Parameter(_INFERRED + shape, init=init, name='Uh') def create_s_placeholder(): # we pass the known dimensions here, which makes dimension inference easier return Placeholder(shape=shape, name='S') # (h, c) # parameters to model function x = Placeholder(name='gru_block_arg') prev_status = create_s_placeholder() # formula of model function Sn_1 = prev_status z = sigmoid(times(x, Uz, name='x*Uz') + times(Sn_1, Wz, name='Sprev*Wz'), name='z') r = sigmoid(times(x, Ur, name='x*Ur') + times(Sn_1, Wr, name='Sprev*Wr'), name='r') h = tanh(times(x, Uh, name='x*Uh') + times(element_times(Sn_1, r, name='Sprev*r'), Wh), name='h') s = plus(element_times((1 - z), h, name='(1-z)*h'), element_times(z, Sn_1, name='z*SPrev'), name=name) apply_x_s = combine([s]) apply_x_s.create_placeholder = create_s_placeholder return apply_x_s
def create_binary_convolution_model(): # Input variables denoting the features and label data feature_var = C.input((num_channels, image_height, image_width)) label_var = C.input((num_classes)) # apply model to input scaled_input = C.element_times(C.constant(0.00390625), feature_var) # first layer is ok to be full precision z = C.layers.Convolution((3, 3), 32, pad=True, activation=C.relu)(scaled_input) z = C.layers.MaxPooling((3,3), strides=(2,2))(z) z = C.layers.BatchNormalization(map_rank=1)(z) z = BinaryConvolution(z, (3,3), 128, channels=32, pad=True) z = C.layers.MaxPooling((3,3), strides=(2,2))(z) z = C.layers.BatchNormalization(map_rank=1)(z) z = BinaryConvolution(z, (3,3), 128, channels=128, pad=True) z = C.layers.MaxPooling((3,3), strides=(2,2))(z) z = C.layers.BatchNormalization(map_rank=1)(z) z = BinaryConvolution(z, (1,1), num_classes, channels=128, pad=True) z = C.layers.AveragePooling((z.shape[1], z.shape[2]))(z) z = C.reshape(z, (num_classes,)) # Add binary regularization (ala Gang Hua) weight_sum = C.constant(0) for p in z.parameters: if (p.name == "filter"): weight_sum = C.plus(weight_sum, C.reduce_sum(C.minus(1, C.square(p)))) bin_reg = C.element_times(.000005, weight_sum) # After the last layer, we need to apply a learnable scale SP = C.parameter(shape=z.shape, init=0.001) z = C.element_times(z, SP) # loss and metric ce = C.cross_entropy_with_softmax(z, label_var) ce = C.plus(ce, bin_reg) pe = C.classification_error(z, label_var) return C.combine([z, ce, pe])
def clone_model(base_model, from_node_names, to_node_names, clone_method): from_nodes = [ find_by_name(base_model, node_name) for node_name in from_node_names ] if None in from_nodes: print( "Error: could not find all specified 'from_nodes' in clone. Looking for {}, found {}" .format(from_node_names, from_nodes)) to_nodes = [ find_by_name(base_model, node_name) for node_name in to_node_names ] if None in to_nodes: print( "Error: could not find all specified 'to_nodes' in clone. Looking for {}, found {}" .format(to_node_names, to_nodes)) input_placeholders = dict( zip(from_nodes, [placeholder() for x in from_nodes])) cloned_net = combine(to_nodes).clone(clone_method, input_placeholders) return cloned_net
def test_model_one_output_of_multi_output_function(): input_dim = 2 proj_dim = 11 x = C.input_variable((input_dim, )) x_placeholder = C.placeholder() w = parameter((input_dim, proj_dim)) b = parameter((proj_dim, )) proj = times(x_placeholder, w) proj_plus_bias = proj + b combined_model = as_block(C.combine([proj, proj_plus_bias]), [(x_placeholder, x)], 'dense_op') labels = C.input_variable((proj_dim, )) lr_schedule = C.learning_rate_schedule(0.003, C.UnitType.sample) ce = cross_entropy_with_softmax(combined_model.outputs[0], labels) pe = classification_error(combined_model.outputs[0], labels) trainer_multitask = C.Trainer(combined_model.outputs[0], (ce, pe), C.sgd(ce.parameters, lr=lr_schedule))
def resnet_model(name, scaled_input): ''' Input: pretrained-model name, scaled_input Function: - We are using Transfer Learning here, since the iNaturalist Image dataset is similar to Imagenet data. - Load Resnet34 as the base-model - Finetune Resnet34 by removing the last layer and add custom layers. - Custom layers: - Dense - Dropout - BatchNorm Return: Model ''' print('Loading Resnet model from {}.'.format(name)) base_model = C.load_model(os.path.join(MODELDIR, name)) features_placeholder = C.placeholder(shape=(3, 224, 224), name='features') features = C.input_variable(shape=(3, 224, 224), name='features') feature_node = C.logging.find_by_name(base_model, 'features') last_node = C.logging.find_by_name(base_model, 'z.x') cloned_layers = C.combine([last_node.owner]).clone(C.CloneMethod.freeze, {feature_node: features_placeholder}) retained_layers = C.as_block(composite=cloned_layers, block_arguments_map=[(features_placeholder, features)], block_op_name='retainedlayers', block_instance_name='retainedlayers') z = retained_layers(scaled_input) z = C.layers.GlobalAveragePooling()(z) z = C.layers.Dropout(dropout_rate=0.25, name='d1')(z) z = C.layers.Dense(10000, activation=C.ops.relu, name='fc1')(z) z = C.layers.BatchNormalization(map_rank=1)(z) z = C.layers.Dropout(dropout_rate=0.4, name='d2')(z) z = C.layers.Dense(10000, activation=C.ops.relu, name='fc2')(z) z = C.layers.BatchNormalization(map_rank=1)(z) z = C.layers.Dropout(dropout_rate=0.5, name='d3')(z) z = C.layers.Dense(num_classes, activation=None, name='prediction')(z) return z
def create_train_model(s2smodel, embed_layer): ''' return: @input map @softmax @loss ''' q = C.Axis.new_unique_dynamic_axis('q') a = C.Axis.new_unique_dynamic_axis('a') b = C.Axis.default_batch_axis() qwk = C.sequence.input_variable(myConfig['wg_dim'], sequence_axis=q, is_sparse=False, name='qwk') qwn = C.sequence.input_variable(myConfig['wn_dim'], sequence_axis=q, is_sparse=False, name='qwn') awk = C.sequence.input_variable(myConfig['wg_dim'], sequence_axis=a, is_sparse=False, name='awk') awn = C.sequence.input_variable(myConfig['wn_dim'], sequence_axis=a, is_sparse=False, name='awn') input_ph = {'qwk': qwk, 'qwn': qwn, 'awk': awk, 'awn': awn} a_processed = embed_layer(awk, awn) q_processed = embed_layer(qwk, qwn) a_onehot = C.splice(awk, awn) print("q_onehot shape:{}".format(a_onehot.output)) # query generate answer logits = s2smodel(a_processed, q_processed) logits = C.sequence.slice(logits, 0, -1) print('logits shape:{}'.format(logits.output)) labels = C.sequence.slice(a_onehot, 1, 0) # <s> a b c </s> -> a b c </s> print('labels shape:{}'.format(labels.output)) logits = C.reconcile_dynamic_axes(logits, labels) loss = C.cross_entropy_with_softmax(logits, labels) errs = C.classification_error(logits, labels) return input_ph, logits, C.combine(loss, errs)
def decode_model(use_gpu=True, gpu_id=0): # use GPU or CPU according to parameters try_set_default_device(gpu(gpu_id) if use_gpu else cpu()) model_dnn = load_model("./model/speech_enhancement.model") features_file = "./test_normed.scp" feature_dim = 257 test_reader = MinibatchSource(HTKFeatureDeserializer(StreamDefs( amazing_features=StreamDef( shape=feature_dim, context=(3, 3), scp=features_file))), randomize=False, frame_mode=False) eval_input_map = {input: test_reader.streams.amazing_features} f = open(features_file) line = f.readline() while line: temp_input_path = line.split(']')[0] mb_size = temp_input_path.split(',')[-1] mb_size = int(mb_size) + 1 noisy_fea = test_reader.next_minibatch( mb_size, input_map=eval_input_map) real_noisy_fea = noisy_fea[input].data node_in_graph = model_dnn.find_by_name('irm') output_nodes = combine([node_in_graph.owner]) out_noisy_fea = output_nodes.eval(real_noisy_fea) # out_noisy_fea = as_composite(model_dnn.output1[0].owner).eval( # real_noisy_fea) out_SE_noisy_fea = np.concatenate((out_noisy_fea), axis=0) out_file_path = line.split('=')[0] out_file_name = os.path.join('./enhanced_norm_fea_mat', out_file_path) out_file_fullpath = os.path.split(out_file_name)[0] # print (out_file_fullpath) if not os.path.exists(out_file_fullpath): os.makedirs(out_file_fullpath) sio.savemat(out_file_name, {'SE': out_SE_noisy_fea}) line = f.readline() f.close()
def __init__(self,model_file=None,im_mean=None, model_output_layer=1): # model specific parameters # 0: Softmax, 1: Unnormalised output layer assert model_output_layer in (0,1), "model output layer must be 0 or 1" self.im_mean=im_mean #self.model_name='cnn_model.dnn' #model_file=os.path.join(self.param.model_dir,self.model_name) print('...loading classification model') # ToDo: do checks for image size and num_channel mod = load_model(model_file) nodes=mod.find_all_with_name('') self.pred = combine([nodes[model_output_layer]]) self.im_height=mod.arguments[0].shape[1] self.im_width=mod.arguments[0].shape[2] self.im_channels=mod.arguments[0].shape[0]
def create_fast_rcnn_eval_model(model, image_input, roi_proposals, cfg): print("creating eval model") predictor = clone_model(model, [cfg["MODEL"].FEATURE_NODE_NAME, "roi_proposals"], ["cls_score", "bbox_regr"], CloneMethod.freeze) pred_net = predictor(image_input, roi_proposals) cls_score = pred_net.outputs[0] bbox_regr = pred_net.outputs[1] if cfg.BBOX_NORMALIZE_TARGETS: num_boxes = int(bbox_regr.shape[1] / 4) bbox_normalize_means = np.array(cfg.BBOX_NORMALIZE_MEANS * num_boxes) bbox_normalize_stds = np.array(cfg.BBOX_NORMALIZE_STDS * num_boxes) bbox_regr = plus(element_times(bbox_regr, bbox_normalize_stds), bbox_normalize_means, name='bbox_regr') cls_pred = softmax(cls_score, axis=1, name='cls_pred') eval_model = combine([cls_pred, bbox_regr]) if cfg["CNTK"].DEBUG_OUTPUT: plot(eval_model, os.path.join(cfg.OUTPUT_PATH, "graph_frcn_eval." + cfg["CNTK"].GRAPH_TYPE)) return eval_model
def test_combine_duplicated_inputs(): input_dim = 1 proj_dim = 2 x = C.input_variable((input_dim,), name='x') b = C.parameter((proj_dim), name='b') w = C.parameter((input_dim, proj_dim), name='w') func_name = 't_plus_b' t = C.times(x, w) t_plus_b = C.plus(t, b, name=func_name) duplicated_t_plus_b = C.combine([t_plus_b, t_plus_b]) def compare_var_names(vars, names): num_vars = len(vars) for i in range(num_vars): if (vars[i].name != names[i]): return False return True assert compare_var_names(duplicated_t_plus_b.outputs, [func_name, func_name])
def output_layer(self, attention_context, modeling_context): att_context = C.placeholder(shape=(8*self.hidden_dim,)) mod_context = C.placeholder(shape=(2*self.hidden_dim,)) #output layer start_logits = C.layers.Dense(1, name='out_start')(C.dropout(C.splice(mod_context, att_context), self.dropout)) if self.two_step: start_hardmax = seq_hardmax(start_logits) att_mod_ctx = C.sequence.last(C.sequence.gather(mod_context, start_hardmax)) else: start_prob = C.softmax(start_logits) att_mod_ctx = C.sequence.reduce_sum(mod_context * start_prob) att_mod_ctx_expanded = C.sequence.broadcast_as(att_mod_ctx, att_context) end_input = C.splice(att_context, mod_context, att_mod_ctx_expanded, mod_context * att_mod_ctx_expanded) m2 = OptimizedRnnStack(self.hidden_dim, bidirectional=True, use_cudnn=self.use_cudnn, name='output_rnn')(end_input) end_logits = C.layers.Dense(1, name='out_end')(C.dropout(C.splice(m2, att_context), self.dropout)) return C.as_block( C.combine([start_logits, end_logits]), [(att_context, attention_context), (mod_context, modeling_context)], 'output_layer', 'output_layer')
def create_binary_convolution_model(): feature_var = C.input((num_channels, image_height, image_width)) label_var = C.input((num_classes)) scaled_input = C.element_times(C.constant(0.00390625), feature_var) z = C.layers.Convolution((3, 3), 32, pad=True, activation=C.relu)(scaled_input) z = C.layers.MaxPooling((3, 3), strides=(2, 2))(z) z = C.layers.BatchNormalization(map_rank=1)(z) z = BinaryConvolution((3, 3), 128, channels=32, pad=True)(z) z = C.layers.MaxPooling((3, 3), strides=(2, 2))(z) z = C.layers.BatchNormalization(map_rank=1)(z) z = BinaryConvolution((3, 3), 128, channels=128, pad=True)(z) z = C.layers.MaxPooling((3, 3), strides=(2, 2))(z) z = C.layers.BatchNormalization(map_rank=1)(z) z = BinaryConvolution((1, 1), num_classes, channels=128, pad=True)(z) z = C.layers.AveragePooling((z.shape[1], z.shape[2]))(z) z = C.reshape(z, (num_classes, )) weight_sum = C.constant(0) for p in z.parameters: if (p.name == "filter"): weight_sum = C.plus(weight_sum, C.reduce_sum(C.minus(1, C.square(p)))) bin_reg = C.element_times(.000005, weight_sum) SP = C.parameter(shape=z.shape, init=0.001) z = C.element_times(z, SP) ce = C.cross_entropy_with_softmax(z, label_var) ce = C.plus(ce, bin_reg) pe = C.classification_error(z, label_var) return C.combine([z, ce, pe])
def build_model(self): phmap = self.get_inputs() df = phmap['df'] qf = phmap['qf'] ab = phmap['ab'] ae = phmap['ae'] #input layer cc = C.reshape(phmap['cc'], (1, -1)) qc = C.reshape(phmap['qc'], (1, -1)) c_processed, q_processed = self.input_layer(phmap['cgw'],phmap['cnw'],cc,\ phmap['qgw'],phmap['qnw'],qc).outputs c_processed = C.splice(c_processed, df) q_processed = C.splice(q_processed, qf) # attention layer output:[#,c][8*hidden_dim] att_context, wei1 = self.attention_layer(c_processed, q_processed, dimc=2 * self.hidden_dim + 3, dimq=2 * self.hidden_dim + 1, common_dim=2 * self.hidden_dim).outputs a = att_context[:4 * self.hidden_dim] b = att_context[4 * self.hidden_dim:] self_context = self.multiHead(a, a, self.hidden_dim // 2) # modeling layer mod_inp = C.splice(self_context, b) mod_context = self.modeling_layer(mod_inp) mod_context = C.splice(mod_context, df) # output layer start_logits, end_logits = self.output_layer(att_context, mod_context).outputs # loss start_loss = seq_loss(start_logits, ab) end_loss = seq_loss(end_logits, ae) new_loss = all_spans_loss(start_logits, ab, end_logits, ae) self._model = C.combine([start_logits, end_logits]) self._loss = new_loss return self._model, self._loss, self._input_phs
def input_layer(self, c1w, c2w): c1w_ph = C.placeholder() c2w_ph = C.placeholder() input_words = C.placeholder(shape=(self.word_dim)) embedded = self.embed()(input_words) processed = OptimizedRnnStack(self.hidden_dim, num_layers=1, bidirectional=True, use_cudnn=True, name='input_rnn')(embedded) c1_processed = processed.clone(C.CloneMethod.share, {input_words: c1w_ph}) c2_processed = processed.clone(C.CloneMethod.share, {input_words: c2w_ph}) return C.as_block(C.combine([c1_processed, c2_processed]), [(c1w_ph, c1w), (c2w_ph, c2w)], 'input_layer', 'input_layer')
def cntk_prediction(pathToImage): """ Fuction has following steps: - open the image - alter the image (change color mode, resize) - convert image to array, subtract the mean, roll axis - load model - pass on the image to model fo classification - return a result of classification or an error Args: pathToImage: server path of image Returns: top_class: the result of classification (1 - target class, 0 - non-target class, -1 - error) """ top_class = -1 size = 224, 224 try: im = Image.open(pathToImage) im = remove_transparency(im) im = LPmode2RGB(im) # black and white images im = im.resize(size) # PIL.Image.NEAREST resampling rgb_image = np.asarray(im, dtype=np.float32) - 128 bgr_image = rgb_image[..., [2, 1, 0]] pic = np.ascontiguousarray(np.rollaxis(bgr_image, 2)) z = load_model(MODEL) z_out = ct.combine([z.outputs[3].owner]) y = ct.ops.softmax(z_out) predictions = np.squeeze(y.eval({y.arguments[0]: [pic]})) top_class = np.argmax(predictions) except Exception: # Nothing will be done in the case of exception. Function will return -1 value. pass return top_class
def load_model(self, model_filename): self.model_filename = model_filename cntk_model = cntk.load_model(model_filename) # First try and find output by name model_output = cntk_model.find_by_name('ScaledLogLikelihood') # Fall back to first defined output if model_output is None: model_output = cntk_model.outputs[0] # Create an object restricted to the desired output. cntk_model = cntk.combine(model_output) # Optimized RNN models won't run on CPU without conversion. if 0 == cntk.use_default_device().type(): cntk_model = cntk.misc.convert_optimized_rnnstack(cntk_model) self.model = cntk_model return self
def input_layer(self,cgw,cc,qgw,qc,qnw,cnw): cgw_ph = C.placeholder() cnw_ph = C.placeholder() cc_ph = C.placeholder() qgw_ph = C.placeholder() qnw_ph = C.placeholder() qc_ph = C.placeholder() input_chars = C.placeholder(shape=(1,self.word_size,self.c_dim)) input_glove_words = C.placeholder(shape=(self.wg_dim,)) input_nonglove_words = C.placeholder(shape=(self.wn_dim,)) embedded = C.splice( C.reshape(self.charcnn(input_chars), self.convs), self.embed()(input_glove_words, input_nonglove_words), name='splice_embed') highway = HighwayNetwork(dim=self.elmo_dim + self.hidden_dim + self.convs, highway_layers=self.highway_layers)(embedded) highway_drop = C.layers.Dropout(self.dropout)(highway) processed = OptimizedRnnStack(self.hidden_dim, num_layers=1, bidirectional=True, use_cudnn=self.use_cudnn, name='input_rnn')(highway_drop) qce = C.one_hot(qc_ph, num_classes=self.c_dim, sparse_output=self.use_sparse) cce = C.one_hot(cc_ph, num_classes=self.c_dim, sparse_output=self.use_sparse) q_processed = processed.clone(C.CloneMethod.share, {input_chars:qce, input_glove_words:qgw_ph, input_nonglove_words:qnw_ph}) c_processed = processed.clone(C.CloneMethod.share, {input_chars:cce, input_glove_words:cgw_ph, input_nonglove_words:cnw_ph}) return C.as_block( C.combine([c_processed, q_processed]), [(cgw_ph, cgw), (cc_ph, cc), (qgw_ph, qgw), (qc_ph, qc), (qnw_ph, qnw), (cnw_ph, cnw)], 'input_layer', 'input_layer')
def _build_network(self, pretrained_policy): self.image_frame = C.input_variable((1, ) + self.observation_space_shape) self.next_image_frame = C.input_variable((1, ) + self.observation_space_shape) self.reward = C.input_variable((1, )) if pretrained_policy is None: h = C.layers.Convolution2D(filter_shape=(7, 7), num_filters=32, strides=(4, 4), pad=True, name='conv_1', activation=C.relu) h = C.layers.Convolution2D(filter_shape=(5, 5), num_filters=64, strides=(2, 2), pad=True, name='conv_2', activation=C.relu)(h) h = C.layers.Convolution2D(filter_shape=(3, 3), num_filters=128, strides=(1, 1), pad=True, name='conv_3', activation=C.relu)(h) h = C.layers.Dense(64, activation=C.relu, name='dense_1')(h) v = C.layers.Dense(1, name='dense_2')(h) self.value = v(self.image_frame) self.next_value = v(self.next_image_frame) self.output = C.combine([self.value, self.next_value]) else: self.output = C.Function.load(pretrained_policy)( self.image_frame, self.next_image_frame) [self.value, self.next_value] = self.output[ self.value.output], self.output[self.next_value.output] target = DISCOUNT_FACTOR * self.next_value + self.reward self.loss = C.squared_error(target, self.value)
def score_models_fast(distance_measure, unk_ivecs, spk_ivecs, siam_output='hl2', calc_softmax=False): print('Score models') #n_spks = spk_ivecs.shape[0] node_in_graph = distance_measure.find_all_with_name(siam_output) #prov_f_dim = node_in_graph[0].shape[0] prov_output = C.combine(node_in_graph[0]) #prov_spk = np.zeros(shape=(n_spks,prov_f_dim),dtype=np.float32) print('Transform blacklist speaker i-vectors') prov_spk = prov_output.eval(spk_ivecs) print('Transform dev. speaker i-vectors') prov_unk = prov_output.eval(unk_ivecs) print('Normalize transformed vectors') prov_spk = length_norm(prov_spk) prov_unk = length_norm(prov_unk) print('Calculate scores') scores = np.dot(prov_spk, prov_unk.transpose()) if calc_softmax: scores = 1 / (1 + np.exp(-2 * scores)) return scores
def load_model(model_filename: str): """A helper function to load the acoustic model from disc. Args: model_filename (str): The file path to the acoustic model. """ cntk_model = cntk.load_model(model_filename) # First try and find output by name model_output = cntk_model.find_by_name('ScaledLogLikelihood') # Fall back to first defined output if model_output is None: model_output = cntk_model.outputs[0] # Create an object restricted to the desired output. cntk_model = cntk.combine(model_output) # Optimized RNN models won't run on CPU without conversion. if 0 == cntk.use_default_device().type(): cntk_model = cntk.misc.convert_optimized_rnnstack(cntk_model) return cntk_model
def combine(operands, name=''): ''' Create a new Function instance which just combines the outputs of the specified list of 'operands' Functions such that the 'Outputs' of the new 'Function' are union of the 'Outputs' of each of the specified 'operands' Functions. E.g. When creating a classification model, typically the CrossEntropy loss Function and the ClassificationError Function comprise the two roots of the computation graph which can be "Combine"d to create a single Function with 2 outputs; viz. CrossEntropy loss and ClassificationError output. Args: operands (list): list of functions or their variables to combine name (str): the name of the node in the network Returns: :class:`cntk.Function` ''' from cntk import combine from cntk import Variable converted_operands = list() for o in operands: if isinstance(o, Variable): converted_operands.append(o.owner) else: converted_operands.append(o) return combine(converted_operands, name)
def create_eval_model(model, image_input, dims_input, rpn_model=None): print("creating eval model") conv_layers = clone_model(model, [feature_node_name], [last_conv_node_name], CloneMethod.freeze) conv_out = conv_layers(image_input) model_with_rpn = model if rpn_model is None else rpn_model rpn = clone_model(model_with_rpn, [last_conv_node_name, "dims_input"], ["rpn_rois"], CloneMethod.freeze) rpn_rois = rpn(conv_out, dims_input) roi_fc_layers = clone_model(model, [last_conv_node_name, "rpn_target_rois"], ["cls_score", "bbox_regr"], CloneMethod.freeze) pred_net = roi_fc_layers(conv_out, rpn_rois) cls_score = pred_net.outputs[0] bbox_regr = pred_net.outputs[1] if cfg["TRAIN"].BBOX_NORMALIZE_TARGETS and cfg["TRAIN"].BBOX_NORMALIZE_TARGETS_PRECOMPUTED: num_boxes = int(bbox_regr.shape[1] / 4) bbox_normalize_means = np.array(cfg["TRAIN"].BBOX_NORMALIZE_MEANS * num_boxes) bbox_normalize_stds = np.array(cfg["TRAIN"].BBOX_NORMALIZE_STDS * num_boxes) bbox_regr = plus(element_times(bbox_regr, bbox_normalize_stds), bbox_normalize_means, name='bbox_regr') cls_pred = softmax(cls_score, axis=1, name='cls_pred') eval_model = combine([cls_pred, rpn_rois, bbox_regr]) return eval_model