def load(self, head): """ :type head: h5py.File """ try: grp = head[self.name] except Exception: print("warning: unable to load parameters for layer", self.name, file=log.v3) return grp_class = as_str(grp.attrs['class']) if grp_class == "<unknown_softmax>": grp_class = "softmax" # bug in some CRNN version. can be ignored. if grp_class != self.layer_class: from NetworkLayer import get_layer_class if not get_layer_class(grp_class, raise_exception=False) is get_layer_class(self.layer_class): print("warning: invalid layer class (expected " + self.layer_class + " got " + grp.attrs['class'] + ")", file=log.v3) for p in self.params: if p not in grp: print("unable to load parameter %s in %s" % (p, self.name), file=log.v4) for p in grp: if p in self.params: if self.params[p].get_value(borrow=True, return_internal_type=True).shape == grp[p].shape: array = grp[p][...] assert not (numpy.isinf(array).any() or numpy.isnan(array).any()) self.params[p].set_value(array) else: print("warning: invalid layer parameter shape for parameter " + p + " of layer " + self.name + \ " (expected " + str(self.params[p].get_value(borrow=True, return_internal_type=True).shape) + \ " got " + str(grp[p].shape) + ")", file=log.v2) #assert self.params[p].get_value(borrow=True, return_internal_type=True).shape == grp[p].shape, \ # "invalid layer parameter shape for parameter " + p + " of layer " + self.name + \ # " (expected " + str(self.params[p].get_value(borrow=True, return_internal_type=True).shape) + \ # " got " + str(grp[p].shape) + ")" else: print("unable to match parameter %s in %s" % (p, self.name), file=log.v4)
def from_hdf(cls, filename=None, model=None, load_params=True, **kwargs): """ Gets the JSON from the hdf file, initializes the network and loads the network params. :param str|None filename: filename of hdf :param h5py.File|None model: hdf, if no filename is provided :param bool load_params: whether to load the params """ if model is None: assert filename model = h5py.File(filename, "r") close_at_end = True else: assert not filename close_at_end = False assert "json" in model.attrs, "Maybe old network model where JSON was not stored. Use version before 2016-10-11." json_content_s = as_str(model.attrs['json']) assert json_content_s and json_content_s != "{}" json_content = json.loads(json_content_s) kwargs = kwargs.copy() if "n_out" not in kwargs: n_in, n_out = cls._n_in_out_from_hdf_model(model) n_out['__final'] = True kwargs["n_in"] = n_in kwargs["n_out"] = n_out network = cls.from_json(json_content, **kwargs) if load_params: network.load_hdf(model) if close_at_end: model.close() return network
def __init__(self, layer_class=None, name="", network=None, train_flag=False, eval_flag=False, depth=1, consensus="flat", forward_weights_init=None, bias_init=None, weight_clip=0.0, cost=None, recurrent_weights_init=None, substitute_param_expr=None): """ :param str layer_class: name of layer type, e.g. "hidden", "recurrent", "lstm" or so. see LayerClasses. :param str name: custom layer name, e.g. "hidden_2" :param Network.LayerNetwork network: the network which we will be part of :param str forward_weights_init: see self.create_forward_weights() :param str bias_init: see self.create_bias() """ self.params = {}; """ :type: dict[str,theano.compile.sharedvalue.SharedVariable] """ self.attrs = {}; """ :type: dict[str,str|float|int|bool|dict] """ self.device = None if layer_class: self.layer_class = as_str(layer_class.encode("utf8")) self.name = as_str(name.encode("utf8")) self.train_flag = train_flag self.eval_flag = eval_flag self.depth = depth if depth != 1: self.set_attr('depth', depth) if consensus != "flat": self.set_attr('consensus', consensus) self.network = network if forward_weights_init: self.set_attr("forward_weights_init", forward_weights_init) self.forward_weights_init = forward_weights_init or "random_normal()" if recurrent_weights_init: self.set_attr("recurrent_weights_init", recurrent_weights_init) self.recurrent_weights_init = recurrent_weights_init or "random_uniform()" if bias_init: self.set_attr("bias_init", bias_init) self.bias_init = bias_init or "zeros()" if substitute_param_expr: self.set_attr("substitute_param_expr", substitute_param_expr) self.substitute_param_expr = substitute_param_expr if weight_clip: self.set_attr('weight_clip', weight_clip) if cost: self.set_attr('cost', cost)
def load(self, head): """ :type head: h5py.File """ try: grp = head[self.name] except Exception: print("warning: unable to load parameters for layer", self.name, file=log.v3) return grp_class = as_str(grp.attrs['class']) if grp_class == "<unknown_softmax>": grp_class = "softmax" # bug in some CRNN version. can be ignored. if grp_class != self.layer_class: from NetworkLayer import get_layer_class if not get_layer_class(grp_class, raise_exception=False) is get_layer_class( self.layer_class): print("warning: invalid layer class (expected " + self.layer_class + " got " + grp.attrs['class'] + ")", file=log.v3) for p in self.params: if p not in grp: print("unable to load parameter %s in %s" % (p, self.name), file=log.v4) for p in grp: if p in self.params: if self.params[p].get_value( borrow=True, return_internal_type=True).shape == grp[p].shape: array = grp[p][...] assert not (numpy.isinf(array).any() or numpy.isnan(array).any()) self.params[p].set_value(array) else: print("warning: invalid layer parameter shape for parameter " + p + " of layer " + self.name + \ " (expected " + str(self.params[p].get_value(borrow=True, return_internal_type=True).shape) + \ " got " + str(grp[p].shape) + ")", file=log.v2) #assert self.params[p].get_value(borrow=True, return_internal_type=True).shape == grp[p].shape, \ # "invalid layer parameter shape for parameter " + p + " of layer " + self.name + \ # " (expected " + str(self.params[p].get_value(borrow=True, return_internal_type=True).shape) + \ # " got " + str(grp[p].shape) + ")" else: print("unable to match parameter %s in %s" % (p, self.name), file=log.v4)
def from_hdf(cls, filename, load_params=True, **kwargs): """ Gets the JSON from the hdf file, initializes the network and loads the network params. :param str filename: filename of hdf :param bool load_params: whether to load the params """ model = h5py.File(filename, "r") json_content_s = as_str(model.attrs['json']) assert json_content_s and json_content_s != "{}" json_content = json.loads(json_content_s) kwargs = kwargs.copy() if "n_out" not in kwargs: n_in, n_out = cls._n_in_out_from_hdf_model(model) kwargs["n_in"] = n_in kwargs["n_out"] = n_out network = cls.from_json(json_content, **kwargs) if load_params: network.load_hdf(model) model.close() return network
def __init__(self, loss, y, dtype=None, copy_input=None, copy_output=None, time_limit=0, use_source_index=False, compute_priors=False, compute_priors_exp_average=0, compute_distortions=False, softmax_smoothing=1.0, grad_clip_z=None, grad_discard_out_of_bound_z=None, normalize_length=False, exclude_labels=[], apply_softmax=True, substract_prior_from_output=False, input_output_similarity=None, input_output_similarity_scale=1, **kwargs): """ :param theano.Variable index: index for batches :param str loss: e.g. 'ce' """ super(OutputLayer, self).__init__(**kwargs) self.set_attr("normalize_length", normalize_length) if dtype: self.set_attr('dtype', dtype) if copy_input: self.set_attr("copy_input", copy_input.name) if grad_clip_z is not None: self.set_attr("grad_clip_z", grad_clip_z) if compute_distortions: self.set_attr("compute_distortions", compute_distortions) if grad_discard_out_of_bound_z is not None: self.set_attr("grad_discard_out_of_bound_z", grad_discard_out_of_bound_z) if not apply_softmax: self.set_attr("apply_softmax", apply_softmax) if substract_prior_from_output: self.set_attr("substract_prior_from_output", substract_prior_from_output) if input_output_similarity: self.set_attr("input_output_similarity", input_output_similarity) self.set_attr("input_output_similarity_scale", input_output_similarity_scale) if use_source_index: self.set_attr("use_source_index", use_source_index) src_index = self.sources[0].index self.index = src_index if not copy_input: self.z = self.b self.W_in = [self.add_param(self.create_forward_weights(source.attrs['n_out'], self.attrs['n_out'], name="W_in_%s_%s" % (source.name, self.name))) for source in self.sources] assert len(self.sources) == len(self.masks) == len(self.W_in) assert len(self.sources) > 0 for source, m, W in zip(self.sources, self.masks, self.W_in): source_output = source.output # 4D input from TwoD Layers -> collapse height dimension if source_output.ndim == 4: source_output = source_output.sum(axis=0) if source.attrs['sparse']: if source.output.ndim == 3: input = source_output[:, :, 0] # old sparse format else: assert source_output.ndim == 2 input = source.output self.z += W[T.cast(input, 'int32')] elif m is None: self.z += self.dot(source_output, W) else: self.z += self.dot(self.mass * m * source_output, W) else: self.z = copy_input.output assert self.z.ndim == 3 if grad_clip_z is not None: grad_clip_z = numpy.float32(grad_clip_z) self.z = theano.gradient.grad_clip(self.z, -grad_clip_z, grad_clip_z) if grad_discard_out_of_bound_z is not None: grad_discard_out_of_bound_z = numpy.float32(grad_discard_out_of_bound_z) self.z = grad_discard_out_of_bound(self.z, -grad_discard_out_of_bound_z, grad_discard_out_of_bound_z) if not copy_output: self.y = y else: self.index = copy_output.index self.y = copy_output.y_out if y is None: self.y_data_flat = None elif isinstance(y, T.Variable): self.y_data_flat = time_batch_make_flat(y) else: assert self.attrs.get("target", "").endswith("[sparse:coo]") assert isinstance(self.y, tuple) assert len(self.y) == 3 s0, s1, weight = self.y from NativeOp import max_and_argmax_sparse n_time = self.z.shape[0] n_batch = self.z.shape[1] mask = self.network.j[self.attrs.get("target", "").replace("[sparse:coo]", "[sparse:coo:2:0]")] out_arg = T.zeros((n_time, n_batch), dtype="float32") out_max = T.zeros((n_time, n_batch), dtype="float32") - numpy.float32(1e16) out_arg, out_max = max_and_argmax_sparse(s0, s1, weight, mask, out_arg, out_max) assert out_arg.ndim == 2 self.y_data_flat = out_arg.astype("int32") self.norm = numpy.float32(1) self.target_index = self.index if time_limit == 'inf': # target_length = self.index.shape[0] # mass = T.cast(T.sum(self.index),'float32') # self.index = theano.ifelse.ifelse(T.gt(self.z.shape[0],target_length),self.sources[0].index,self.index) # self.norm = mass / T.cast(T.sum(self.index),'float32') num = T.cast(T.sum(self.index), 'float32') if self.eval_flag: self.index = self.sources[0].index else: import theano.ifelse padx = T.zeros((T.abs_(self.index.shape[0] - self.z.shape[0]), self.index.shape[1], self.z.shape[2]), 'float32') + self.z[-1] pady = T.zeros((T.abs_(self.index.shape[0] - self.z.shape[0]), self.index.shape[1]), 'int32') # + y[-1] padi = T.ones((T.abs_(self.index.shape[0] - self.z.shape[0]), self.index.shape[1]), 'int8') self.z = theano.ifelse.ifelse(T.lt(self.z.shape[0], self.index.shape[0]), T.concatenate([self.z, padx], axis=0), self.z) # self.z = theano.ifelse.ifelse(T.gt(self.z.shape[0], self.index.shape[0]),self.z[:self.index.shape[0]], self.z) self.y_data_flat = time_batch_make_flat(theano.ifelse.ifelse(T.gt(self.z.shape[0], self.index.shape[0]), T.concatenate([y, pady], axis=0), y)) # self.index = theano.ifelse.ifelse(T.gt(self.z.shape[0], self.index.shape[0]), T.concatenate([T.ones((self.z.shape[0] - self.index.shape[0],self.z.shape[1]),'int8'), self.index], axis=0), self.index) self.index = theano.ifelse.ifelse(T.gt(self.z.shape[0], self.index.shape[0]), T.concatenate([padi, self.index], axis=0), self.index) self.norm *= num / T.cast(T.sum(self.index), 'float32') elif time_limit > 0: end = T.min([self.z.shape[0], T.constant(time_limit, 'int32')]) num = T.cast(T.sum(self.index), 'float32') self.index = T.set_subtensor(self.index[end:], T.zeros_like(self.index[end:])) self.norm = num / T.cast(T.sum(self.index), 'float32') self.z = T.set_subtensor(self.z[end:], T.zeros_like(self.z[end:])) # xs = [s.output for s in self.sources] # self.z = AccumulatorOpInstance(*[self.b] + xs + self.W_in) # outputs_info = None #[ T.alloc(numpy.cast[theano.config.floatX](0), index.shape[1], self.attrs['n_out']) ] # self.z, _ = theano.scan(step, # sequences = [s.output for s in self.sources], # non_sequences = self.W_in + [self.b]) self.set_attr('from', ",".join([s.name for s in self.sources])) index_flat = self.index.flatten() for label in exclude_labels: index_flat = T.set_subtensor(index_flat[(T.eq(self.y_data_flat, label) > 0).nonzero()], numpy.int8(0)) self.i = (index_flat > 0).nonzero() self.j = ((numpy.int32(1) - index_flat) > 0).nonzero() self.loss = as_str(loss.encode("utf8")) self.attrs['loss'] = self.loss if compute_priors: self.set_attr('compute_priors', compute_priors) if compute_priors_exp_average: self.set_attr('compute_priors_exp_average', compute_priors_exp_average) if softmax_smoothing != 1.0: self.attrs['softmax_smoothing'] = softmax_smoothing print >> log.v4, "Logits before the softmax scaled with factor ", softmax_smoothing self.z *= numpy.float32(softmax_smoothing) if self.loss == 'priori': self.priori = self.shared(value=numpy.ones((self.attrs['n_out'],), dtype=theano.config.floatX), borrow=True) if input_output_similarity: # First a self-similarity of input and output, # and then add -similarity or distance between those to the constraints, # so that the input and output correlate on a frame-by-frame basis. # Here some other similarities/distances we could try: # http://docs.scipy.org/doc/scipy/reference/generated/scipy.spatial.distance.pdist.html # https://brenocon.com/blog/2012/03/cosine-similarity-pearson-correlation-and-ols-coefficients/ from TheanoUtil import self_similarity_cosine self_similarity = self_similarity_cosine # maybe other data_layer = self.find_data_layer() assert data_layer assert data_layer.output.ndim == 3 n_time = data_layer.output.shape[0] n_batch = data_layer.output.shape[1] findex = T.cast(self.output_index(), "float32") findex_bc = findex.reshape((n_time * n_batch,)).dimshuffle(0, 'x') findex_sum = T.sum(findex) data = data_layer.output.reshape((n_time * n_batch, data_layer.output.shape[2])) * findex_bc assert self.z.ndim == 3 z = self.z.reshape((n_time * n_batch, self.z.shape[2])) * findex_bc data_self_sim = T.flatten(self_similarity(data)) z_self_sim = T.flatten(self_similarity(z)) assert data_self_sim.ndim == z_self_sim.ndim == 1 sim = T.dot(data_self_sim, z_self_sim) # maybe others make sense assert sim.ndim == 0 # sim is ~ proportional to T * T, so divide by T. sim *= numpy.float32(input_output_similarity_scale) / findex_sum self.constraints -= sim # self.make_output(self.z, collapse = False) # Note that self.output is going to be overwritten in our derived classes. self.output = self.make_consensus(self.z) if self.depth > 1 else self.z self.y_m = None # flat log(self.p_y_given_x)