def get_latents(self, inputs=None, training=None, mask=None, return_prior=False, **kwargs) -> Sequence[Distribution]: z0 = super().get_latents(inputs=inputs, training=training, mask=mask, return_prior=return_prior, **kwargs) posterior, prior = list(as_tuple(z0)), [] if return_prior: posterior = list(as_tuple(z0[0])) prior = list(as_tuple(z0[1])) z0 = z0[0] # new encode called if inputs is not None: self.decode(z0, training=training, mask=mask) for layer in self.hierarchical_latents: posterior.append(layer.posterior) prior.append(layer.prior) if return_prior: return tuple(posterior), tuple(prior) return tuple(posterior)
def __init__(self, input_shape, batchnorm=False, units=1000, n_hidden_layers=5, n_outputs=2, activation=tf.nn.leaky_relu, name="FactorDiscriminator"): # 1: real sample for q(z) (or last unit in case n_outputs > 2) and # 0: fake sample from q(z-) layers = [ keras.layers.InputLayer(input_shape=tf.nest.flatten(input_shape)), keras.layers.Flatten() ] for idx, (units, activation) in enumerate( zip(as_tuple(units, N=n_hidden_layers), as_tuple(activation, N=n_hidden_layers))): sublayers = [ keras.layers.Dense(units, use_bias=not batchnorm, activation='linear') ] if batchnorm: sublayers.append(keras.layers.BatchNormalization()) sublayers.append(keras.layers.Activation(activation)) layers += sublayers layers.append(keras.layers.Dense(int(n_outputs), activation='linear')) super().__init__(layers, name=name) self.input_ndim = len(self.input_shape) - 1
def elbo_components(self, inputs, training=None, mask=None, **kwargs): llk, kl = super().elbo_components(inputs, mask=mask, training=training) px_z, qz_x = self.last_outputs for z, qz in zip(as_tuple(self.latents), as_tuple(qz_x)): tc = total_correlation(tf.convert_to_tensor(qz), qz) kl[f'tc_{z.name}'] = (self.beta - 1.) * tc return llk, kl
def get_all_tensors(scope=None, name=None, full_name=None, device=None): """ Parameters ---------- scope: {str, None} scope name which the Variables have been created name: str name of tensor (without variable scope) full_name: str name of tensor WITH variable scope. device : {str, None} name of the device to which this op has been assigned (e.g. /cpu:0, or /gpu:0) """ ops = get_all_operations(device=device, scope=scope, sort=False) alltensors = [] for o in ops: alltensors += list(o.inputs) + list(o._outputs) for i in o.control_inputs: alltensors += list(i.inputs) + list(i._outputs) alltensors = list(set(alltensors)) # ====== filter out unsupport types ====== # if name is not None: name = as_tuple(name, t=string_types) alltensors = [t for t in alltensors if any((n == t.name.split('/')[-1] or n.split(':')[0] == t.name.split('/')[-1].split(':')[0]) for n in name)] if full_name is not None: full_name = as_tuple(full_name, t=string_types) alltensors = [t for t in alltensors if any((n == t.name or n.split(':')[0] == t.name.split(':')[0]) for n in full_name)] return alltensors
def elbo_components( self, inputs: Union[Tensor, List[Tensor]], training: Optional[bool] = None, mask: Optional[Tensor] = None, **kwargs, ) -> Tuple[Dict[str, Tensor], Dict[str, Tensor]]: """Calculate the distortion (log-likelihood) and rate (KL-divergence) for contruction the Evident Lower Bound (ELBO)""" # organize all inputs to list pX_Z, qZ_X = self(inputs, training=training, mask=mask, **kwargs) ### llk llk = {} for obs, x, pX in zip(self.observation, as_tuple(inputs), as_tuple(pX_Z)): llk[f'llk_{obs.name}'] = pX.log_prob(x) ### kl kl = {} for z, qZ in zip(self.latents, as_tuple(qZ_X)): if hasattr(qZ, "KL_divergence"): kl[f'kl_{z.name}'] = qZ.KL_divergence(analytic=self.analytic, reverse=self.reverse, sample_shape=None, keepdims=True) else: kl[f'kl_{z.name}'] = tf.constant(0., dtype=self.dtype) return llk, kl
def elbo_components(self, inputs, training=None, mask=None): ## unsupervised ELBO X, y, mask = prepare_ssl_inputs(inputs, mask=mask, n_unsupervised_inputs=1) if mask is not None: mask = tf.reshape(mask, (-1, )) llk, kl = super(AnnealingVAE, self).elbo_components(X[0], mask=mask, training=training) P, Q = self.last_outputs px_z = P[:-1] py_z = P[-1] Q = as_tuple(Q) # q(z|x) ## supervised loss llk[f"llk_{self.labels.name}"] = _get_llk_y(py_z, y, mask, self.alpha) ## MI objective self.labels = self.labels_q mi_y, mi_z = self._mi_loss(Q, py_z, training=training, mask=mask) self.labels = self.labels_p ## maximizing the MI llk[f'mi_{self.labels.name}'] = mi_y for z, mi in zip(as_tuple(self.latents), mi_z): llk[f'mi_{z.name}'] = mi return llk, kl
def _step(X): ret_llk = [] ret_kl = [] if isinstance(X, dict): Q = self.encode(training=training, **X, **kwargs) else: Q = self.encode(X, training=training, **kwargs) Q = as_tuple(Q) z = [i.sample(n_mcmc) for i in Q] z_reshape = [tf.reshape(i, (-1, i.shape[-1])) for i in z] P = self.decode(z_reshape[0] if len(Q) == 1 else z_reshape, training=training) P = as_tuple(P) # calculate the KL for qz, z in zip(Q, z): if hasattr(qz, 'KL_divergence'): pz = qz.KL_divergence.prior name = qz.name.split('_')[0] llk_q = qz.log_prob(z) llk_p = pz.log_prob(z) ret_kl.append((name, (llk_q, llk_p))) # calculate the LLK if isinstance(X, dict): X = X['inputs'] X = as_tuple(X) for px, x in zip(P, X): x = tf.tile(x, [n_mcmc] + [1 for i in range(len(x.shape) - 1)]) name = px.name.split('_')[0] llk_x = tf.reshape(px.log_prob(x), (n_mcmc, -1)) ret_llk.append((name, llk_x)) return ret_llk, ret_kl
def elbo_components(self, inputs, training=None, mask=None): ## unsupervised ELBO inputs = as_tuple(inputs) # === 1. unsupervised X_uns = inputs[0] llk_uns, kl_uns = super().elbo_components(X_uns, mask=mask, training=training) P, Q = self.last_outputs py_z = P[-1] Q = as_tuple(Q) # q(z|x) # MI objective mi_y, mi_z = self._mi_loss(Q, py_z, training=training) llk_uns[f'mi_{self.labels.name}'] = mi_y for z, mi in zip(as_tuple(self.latents), mi_z): llk_uns[f'mi_{z.name}'] = mi # === 2. unsupervised if len(inputs) > 1: X_sup, y_sup = inputs[1:] is_empty = tf.size(X_sup) == 0 llk_sup, kl_sup = super().elbo_components(X_sup, mask=mask, training=training) P, Q = self.last_outputs # ignore if data is empty llk_sup = self.ignore_empty(is_empty, llk_sup) kl_sup = self.ignore_empty(is_empty, kl_sup) llk_sup[f'llk_{self.labels.name}'] = tf.cond( is_empty, lambda: 0., lambda: _get_llk_y(P[-1], y_sup, self.alpha)) else: llk_sup, kl_sup = {}, {} # === 3. merge objectives return self.merge_objectives(llk_uns, kl_uns, llk_sup, kl_sup)
def callback(): losses = get_current_trainer().valid_loss if losses[-1] <= np.min(losses): vae.save_weights(overwrite=True) # posterior vp = VariationalPosterior(model=vae, inputs=x_samples, groundtruth=GroundTruth(y_samples), n_samples=1000) px = as_tuple(vp.outputs) qz = as_tuple(vp.latents) # store the histogram mean = tf.reduce_mean(qz[0].mean(), axis=0) std = tf.reduce_mean(qz[0].stddev(), axis=0) # show traverse image images = np.concatenate([ vp.traverse(i, min_val=-3, max_val=3, num=21, mode='linear').outputs[0].mean().numpy() for i in np.argsort(std)[:20] ]) image_traverse = to_image(images, grids=(20, int(images.shape[0] / 20))) # show sampled image px = as_tuple(vae.decode(z_samples, training=False)) image_sampled = to_image(px[0].mean().numpy(), grids=(4, 4)) return dict(mean=mean, std=std, traverse=image_traverse, sampled=image_sampled)
def supervised_loss(self, labels: Union[tf.Tensor, List[tf.Tensor]], qz_x: Distribution, mean: bool = False, mask: Optional[tf.Tensor] = None, training: Optional[bool] = None) -> tf.Tensor: labels = as_tuple(labels) z = self._to_samples(qz_x, mean=mean, stop_grad=True) distributions = as_tuple(self(z, training=training)) ## applying the mask (1-labelled, 0-unlabelled) if mask is not None: mask = tf.reshape(mask, (-1, )) # labels = [tf.boolean_mask(y, mask, axis=0) for y in labels] # z_logits = tf.boolean_mask(z_logits, mask, axis=0) ## calculate the loss loss = 0. for dist, y_true in zip(distributions, labels): llk = dist.log_prob(y_true) # check the mask careful here # if no data for labels, just return 0 if mask is not None: llk = tf.cond(tf.reduce_all(tf.logical_not(mask)), lambda: 0., lambda: tf.boolean_mask(llk, mask, axis=0)) # negative log-likelihood here loss += -llk # check non-zero, if zero the gradient must be stop or NaN gradient happen loss = tf.reduce_mean(loss) loss = tf.cond( tf.abs(loss) < 1e-8, lambda: tf.stop_gradient(loss), lambda: loss) return loss
def __init__(self, n_new_features, n_time_context, time_pool='max', backward=False, W_init=init_ops.glorot_uniform_initializer(seed=randint()), b_init=init_ops.constant_initializer(0), activation=K.linear, **kwargs): super(TimeDelayedDense, self).__init__(**kwargs) if n_new_features is None: self.n_new_features = [] else: self.n_new_features = as_tuple(n_new_features, t=int) self.n_time_context = int(n_time_context) self.n_layers = len(self.n_new_features) # ====== initialization ====== # self.W_init = W_init self.b_init = b_init # ====== activation ====== # if activation is None: activation = K.linear if not isinstance(activation, (tuple, list)): activation = (activation,) activation = [K.linear if i is None else i for i in activation] self.activation = as_tuple(activation, N=self.n_layers) # ====== time axis manipulation ====== # time_pool = str(time_pool).lower() assert time_pool in _allow_time_pool, \ "Only support: %s; but given: '%s'" % (str(_allow_time_pool), str(time_pool)) self.time_pool = time_pool self.backward = bool(backward)
def copy(self, indices: Optional[Union[slice, List[int]]] = None, latents: Optional[Distribution] = None, outputs: Optional[List[Distribution]] = None, suffix: str = 'copy') -> VariationalPosterior: """Return the deepcopy""" obj = super().copy(suffix=suffix) # helper for slicing fslice = lambda x: x[indices] if indices is not None else x # copy the factors obj._groundtruth = fslice(self._groundtruth.copy()) # copy the inputs obj._inputs = [np.array(fslice(i)) for i in self._inputs] obj._indices = np.array(fslice(self._indices)) ## inference for the latents and outputs if indices is not None: if latents is None: inputs = obj.inputs latents = self.model.encode( inputs[0] if len(inputs) == 1 else inputs, training=False) if outputs is None: outputs = self.model.decode(latents, training=False) latents = as_tuple(latents) obj._latents = CombinedDistribution(latents, name='Latents') \ if len(latents) > 1 else latents[0] obj._outputs = list(as_tuple(outputs)) ## just copy paste else: obj._latents = self._latents.copy() obj._outputs = [o.copy() for o in self._outputs] return obj
def tile(x, reps, axis=None): r""" Construct an array by repeating `x` the number of times given by `reps`. If x has shape (s1, s2, s3) and axis=(1, -1), the output will have shape (s1, s2 * n[0], s3 * n[1]). Parameters ---------- reps : {int, list of int} each number of repeatation according to the axes axis : {int, list or int} all axes for repeating """ ndim = x.ndim if axis is not None: if not isinstance(axis, (tuple, list)): axis = (axis, ) axis = _normalize_axis(axis, ndim) reps = as_tuple(reps, N=len(axis), t=int) multiples = [ reps[axis.index(i)] if i in axis else 1 for i in range(ndim) ] else: reps = as_tuple(reps, t=int) multiples = [reps[i] if i < len(reps) else 1 for i in range(ndim)] if tf.is_tensor(x): return tf.tile(x, multiples=multiples) elif torch.is_tensor(x): return x.repeat(multiples) return np.tile(x, reps=multiples)
def get(self, scope=None, name=None, full_name=None, roles=None, match_all=False, exact=False, beginning_scope=False): """ Return all variables and tensor with given roles Parameters ---------- scope : {None, string} name of variable scope, any of the scope that match given name will be selected name : {None, string} the name of tensor without the output indexing ":0" and the scope full_name : {None, string} the full name includes both scope and tensor name without the output indexing ":0" roles : {None, odin.backend.role} specific roles of the tensor match_all : bool (default: False) If ``True``, checks if the variable has all given roles. If ``False``, any of the roles is sufficient. exact : bool (default: False) If ``True``, use ``==`` for comparison to get exactly same roles. If ``False``, use `issubclass` for comparison, hence, also match the descendant roles. beginning_scope : bool (default: True) if True, the provide scope must be the beginning scope, otherwise, it could be in the middle of multiple scopes """ alltensors = self.tensors + self.variables # ====== by role ====== # if roles is not None: alltensors = [t for t in alltensors if has_roles(t, roles=roles, match_all=match_all, exact=exact)] # ====== from general to detail ====== # if scope is not None: scope = str(scope) if len(scope) == 0: alltensors = [t for t in alltensors if '/' not in t.name] else: scope_name_pattern = _TF_SCOPE_PATTERN(scope, beginning_scope) alltensors = [t for t in alltensors if len(scope_name_pattern.findall(t.name))] # ====== filter by name ====== # if name is not None: name = as_tuple(name, t=string_types) alltensors = [t for t in alltensors if any((n == t.name.split('/')[-1] or n.split(':')[0] == t.name.split('/')[-1].split(':')[0]) for n in name)] # ====== full name ====== # if full_name is not None: full_name = as_tuple(full_name, t=string_types) alltensors = [t for t in alltensors if any((n == t.name or n.split(':')[0] == t.name.split(':')[0]) for n in full_name)] return alltensors
def create_feeder(self, data, recipes, indices=None, batch_filter=None, batch_mode='batch', name=None, override=False): """ Parameters ---------- data: list of str list of name for all data used, the order of this list is the order of returned data. recipes: list or single odin.fuel.FeederRecipe the list of recipes defining the rule of transforming the data indices: None, string, dict, list list of (name, (start, end)) for iterating over files in Feeder batch_filter: call-able must be a function has take a list of np.ndarray as first arguments ([X]) or ([X, y]), you can return None to ignore given batch, return the data for accepting the batch batch_mode: 'batch' or 'file' (string type) 'batch' mode return shuffling and return everything in small batches 'file' mode return [(file_name, order_index, data...), ...] name: None, or string if name is provided, the feeder information will be saved, which include the `indices`, `recipes` Note ---- by defaults, the Feeder is created using only 1 CPU with `buffer_size=1` using the method `set_multiprocessing(ncpu=None, buffer_size=None, maximum_queue_size=None)` for changing this information. """ from odin.fuel.feeder import Feeder, IndexedData # check data data = [self.__getitem__(dat) if is_string(dat) else as_data(dat) for dat in as_tuple(data)] # check recipes if is_string(recipes): recipes = self._saved_recipes[recipes] else: recipes = as_tuple(recipes, t=FeederRecipe) # check indices if indices is None: indices = self.__getitem__('indices') elif is_string(indices): indices = self._saved_indices[indices] elif isinstance(indices, (Mapping, tuple, list, np.ndarray)): pass # ====== saving recipes and indices, if name is not None ====== # if is_string(name): if name not in self._saved_indices or override: self.add_indices(indices, name, override=True) if name not in self._saved_recipes or override: self.add_recipes(recipes, name, override=True) # ====== create Feeder ====== # feeder = Feeder(IndexedData(data=data, indices=indices), batch_filter=batch_filter, batch_mode=batch_mode, ncpu=1, buffer_size=1) return feeder.set_recipes(recipes)
def _mi_loss( self, Q: Sequence[Distribution], py_z: Distribution, training: Optional[bool] = None, which_latents_sampling: Optional[List[int]] = None, ) -> Tuple[tf.Tensor, List[tf.Tensor]]: ## sample the prior batch_shape = Q[0].batch_shape_tensor() if which_latents_sampling is None: which_latents_sampling = list(range(len(Q))) z_prime = [ q.KL_divergence.prior.sample(batch_shape) if i in which_latents_sampling else tf.stop_gradient( tf.convert_to_tensor(q)) for i, q in enumerate(Q) ] if len(z_prime) == 1: z_prime = z_prime[0] ## decoding px = self.decode(z_prime, training=training)[0] if px.reparameterization_type == NOT_REPARAMETERIZED: x = px.mean() else: x = tf.convert_to_tensor(px) # should not stop gradient here, generator need to be updated # x = tf.stop_gradient(x) Q_prime = self.encode(x, training=training) qy_z = self.predict_factors(latents=Q_prime, training=training) ## y ~ p(y|z), stop gradient here is important to prevent the encoder # updated twice this significantly increase the stability, otherwise, # encoder and latents often get NaNs gradients if self.reverse_mi: # D_kl(p(y|z)||q(y|z)) y_samples = tf.stop_gradient(py_z.sample()) Dkl = py_z.log_prob(y_samples) - qy_z.log_prob(y_samples) else: # D_kl(q(y|z)||p(y|z)) y_samples = tf.stop_gradient(qy_z.sample()) Dkl = qy_z.log_prob(y_samples) - py_z.log_prob(y_samples) ## only calculate MI for unsupervised data mi_y = tf.reduce_mean(Dkl) ## mutual information (we want to maximize this, hence, add it to the llk) if training: mi_y = tf.cond( self.step >= self.steps_without_mi, true_fn=lambda: mi_y, false_fn=lambda: tf.stop_gradient(mi_y), ) else: mi_y = tf.stop_gradient(mi_y) mi_y = self.mi_coef * mi_y ## this value is just for monitoring mi_z = [] for q, z in zip(as_tuple(Q_prime), as_tuple(z_prime)): mi = tf.reduce_mean(tf.stop_gradient(q.log_prob(z))) mi = tf.cond(tf.math.is_nan(mi), true_fn=lambda: 0., false_fn=lambda: tf.clip_by_value(mi, -1e8, 1e8)) mi_z.append(mi) return mi_y, mi_z
def elbo_components(self, inputs, training=None, mask=None, **kwargs): llk, kl = super().elbo_components(inputs, mask=mask, training=training) px_z, qz_x = self.last_outputs # repeat for each latent for layer, qz in zip(as_tuple(self.latents), as_tuple(qz_x)): # div(qZ||pZ) info_div = self.divergence(qz, qz.KL_divergence.prior) kl[f'div_{layer.name}'] = (self.lamda - self.beta) * info_div return llk, kl
def elbo_components(self, inputs, training=None, mask=None): llk, kl = super().elbo_components(inputs, mask=mask, training=training) px_z, qz_x = self.last_outputs for z, qz in zip(as_tuple(self.latents), as_tuple(qz_x)): dip = disentangled_inferred_prior_loss(qz, only_mean=self.only_mean, lambda_offdiag=self.lambda_offdiag, lambda_diag=self.lambda_diag) kl[f'dip_{z.name}'] = dip return llk, kl
def __init__(self, pool_size=2, strides=None, dilation=1, pad='valid', mode='max', transpose_mode='nn', **kwargs): super(Pool, self).__init__(**kwargs) self.pool_size = as_tuple(pool_size, t=int) self.strides = self.pool_size if strides is None \ else as_tuple(strides, t=int) self.dilation = (1,) if dilation is None else as_tuple(dilation, t=int) self.pad = pad.upper() if is_string(pad) else as_tuple(pad, t=int) self.mode = mode.upper() self.transpose_mode = transpose_mode
def save_figs(args: Arguments, name: str, figs: Optional[Sequence[plt.Figure]] = None): path = get_results_path(args) multi_figs = True if figs is not None and len(as_tuple(figs)) == 1: multi_figs = False figs = as_tuple(figs) path = f'{path}/{name}.{"pdf" if multi_figs else "png"}' vs.plot_save(path, figs, dpi=args.dpi, verbose=True)
def get_all_variables(scope=None, name=None, full_name=None, graph_keys=[tf.GraphKeys.GLOBAL_VARIABLES, tf.GraphKeys.LOCAL_VARIABLES, tf.GraphKeys.MODEL_VARIABLES, tf.GraphKeys.TRAINABLE_VARIABLES], graph=None, beginning_scope=True): """ Parameters ---------- scope: {str, None} scope name which the Variables have been created name: str name of tensor (WITHOUT variable scope) full_name: str name of tensor WITH variable scope. beginning_scope : bool (default: True) if True, the provide scope must be the beginning scope, otherwise, it could be in the middle of multiple scopes """ var = [] # ====== first get all available variable ====== # for k in graph_keys: if graph is None: var += [i for i in tf.get_collection(k) if isinstance(i, tf.Variable)] else: var += [i for i in graph.get_collection(k) if isinstance(i, tf.Variable)] var = list(set(var)) # filtering: start from general to detail # ====== filter by scope ====== # if scope is not None: scope = str(scope) if len(scope) == 0: var = [v for v in var if '/' not in v.name] else: scope_name_pattern = _TF_SCOPE_PATTERN(scope, beginning_scope) var = [v for v in var if len(scope_name_pattern.findall(v.name))] # ====== filter by name ====== # if name is not None: name = as_tuple(name, t=string_types) var = [v for v in var if any((v.name.split('/')[-1] == n or v.name.split('/')[-1].split(':')[0] == n.split(':')[0]) for n in name)] # ====== filter by fullname ====== # if full_name is not None: full_name = as_tuple(full_name, t=string_types) var = [v for v in var if any((n == v.name or n.split(':')[0] == v.name.split(':')[0]) for n in full_name)] return var
def callback(): losses = get_current_trainer().valid_loss if losses[-1] <= np.min(losses): vae.save_weights(overwrite=True) # reconstruction px, _ = vae(x_samples, training=True) image_reconstructed = to_image(as_tuple(px)[0].mean().numpy(), grids=(4, 4)) # latent traverse vp = VariationalPosterior(model=vae, inputs=x_samples, groundtruth=GroundTruth(y_samples), n_samples=1000) # stats mean = tf.reduce_mean(vp.latents.mean(), axis=0) std = tf.reduce_mean(vp.latents.stddev(), axis=0) w_d = tf.reduce_sum(vae.decoder.trainable_variables[0], axis=-1) image_latents = plot_latent_units(mean, std, w_d) # show traverse image images = np.concatenate([ vp.traverse(i, min_val=-2, max_val=2, num=21, n_samples=1, mode='linear').outputs[0].mean().numpy() for i in np.argsort(std)[:20] ]) image_traverse = to_image(images, grids=(20, int(images.shape[0] / 20))) # show sampled image px = as_tuple(vae.decode(z_samples, training=False)) image_sampled = to_image(px[0].mean().numpy(), grids=(4, 4)) # gradients all_grads = [(k, v) for k, v in vae.last_metrics.items() if 'grad/' in k] encoder_grad = 0 decoder_grad = 0 latents_grad = 0 if len(all_grads) > 0: encoder_grad = sum(v for k, v in all_grads if 'Encoder' in k) decoder_grad = sum(v for k, v in all_grads if 'Decoder' in k) latents_grad = sum(v for k, v in all_grads if 'Latents' in k) # return return dict(mean=mean, std=std, w_decode=w_d, encoder_grad=encoder_grad, decoder_grad=decoder_grad, latents_grad=latents_grad, noise_units=np.sum(std > 0.9), reconstructed=image_reconstructed, traverse=image_traverse, sampled=image_sampled, latents=image_latents)
def __init__(self, data): for d in as_tuple(data): if not isinstance(d, (np.ndarray, Data)): raise ValueError('`data` can only be instance of numpy.ndarray or ' 'odin.fuel.data.Data, but given type: %s' % str(type(d))) data = [NdarrayData(d) if isinstance(d, np.ndarray) else d for d in as_tuple(data)] if len(set(len(d) for d in data)) > 1: raise ValueError("All data in given data list must have the same length, " "but given: %s" % str([len(d) for d in data])) super(DataGroup, self).__init__(data, read_only=True)
def max(self, axis=None): if axis is None or 0 in as_tuple(axis, t=int): y = self.apply( f=lambda x: [i.max(axis=axis) for i in as_tuple(x)], f_merge=lambda x: [np.max([j[i] for j in x], axis=0) for i in range(len(x[0]))]) else: y = [i.max(axis=axis) for i in self._data] return y if self.is_data_list else y[0]
def mean(self, axis=None): sum1 = as_tuple(self.sum(axis)) results = [] for s1, dat in zip(sum1, self._data): shape = dat.shape if axis is None: n = np.prod(shape) else: n = np.prod([shape[i] for i in as_tuple(axis, t=int)]) results.append(s1 / n) return results if self.is_data_list else results[0]
def sum2(self, axis=None): if axis is None or 0 in as_tuple(axis, t=int): y = self.apply( f=lambda x: [i.__pow__(2).sum(axis=axis) for i in as_tuple(x)], f_merge=lambda x: [sum(j[i] for j in x) for i in range(len(x[0]))]) else: y = [i.__pow__(2).sum(axis=axis) for i in self._data] return y if self.is_data_list else y[0]
def initialize(self): # reversed to height width for easy processing if self.size is not None: self.size = as_tuple(self.size, N=2, t=int) segments = self.segments video_ext = as_tuple('' if self.video_ext is None else self.video_ext, 1, str) # ====== load jobs ====== # if isinstance(segments, str): if not os.path.exists(segments): raise ValueError('Path to segments must exists, however, ' 'exist(segments)={}'.format( os.path.exists(segments))) if os.path.isdir(segments): file_list = get_all_files(segments) file_list = [(os.path.basename(i), i, 0.0, -1.0) for i in file_list] # segment, path, start, end else: # csv file file_list = np.genfromtxt(segments, dtype=str, delimiter=' ') elif isinstance(segments, (tuple, list)): if isinstance(segments[0], str): # just a list of path to file file_list = [(os.path.basename(i), os.path.abspath(i), 0.0, -1.0) for i in segments] elif isinstance(segments[0], (tuple, list)): if len(segments[0]) != 4: raise Exception( 'segments must contain information in following for:' '[name] [path] [start] [end]') file_list = segments # filter using support audio extension file_list = [ f for f in file_list if any(ext in f[1] for ext in video_ext) ] # convert into: audio_path -> segment(name, start, end, channel) self.jobs = defaultdict(list) names = [] for segment, file, start, end in file_list: self.jobs[file].append((segment, float(start), float(end))) names.append(segment) self.jobs = sorted(self.jobs.items(), key=lambda x: x[0]) # ====== load bounding box ====== # if self.boundingbox is not None: if not isinstance(self.boundingbox, dict): raise ValueError('Bounding box must be a dictionary') if set(names) != set(self.boundingbox.keys()): raise Exception( 'Segments names and boundingbox keys mismatch.') # ====== check output ====== # self.dataset = Dataset(self.output) self._temp_path = get_tempdir() print('Temporary dir created at:', self._temp_path) # remove old cache files for p in os.listdir(self._temp_path): os.remove(os.path.join(self._temp_path, p))
def decode(self, latents, training=None, mask=None, **kwargs): py_z = self.predict_factors(latents=latents, training=training, mask=mask) # if labeled data is provided, use them in p(x|y,z) y = tf.stop_gradient(tf.convert_to_tensor(py_z)) h = tf.concat(as_tuple(latents) + (y, ), axis=-1) px_z = super(AnnealingVAE, self).decode(h, training=training, mask=mask, **kwargs) return as_tuple(px_z) + (py_z, )
def var(self, axis=None): sum1 = as_tuple(self.sum(axis)) sum2 = as_tuple(self.sum2(axis)) results = [] for s1, s2, dat in zip(sum1, sum2, self._data): shape = dat.shape if axis is None: n = np.prod(shape) else: n = np.prod([shape[i] for i in as_tuple(axis, t=int)]) results.append((s2 - np.power(s1, 2) / n) / n) return results if self.is_data_list else results[0]
def add_roles(variables, roles): r"""Add a role to a given variable. Parameters ---------- var : :class:`~tensor.TensorVariable` The variable to assign the new role to. roles : :subclass:`Role` this roles will be concatenated with current roles scope. Notes ----- Some roles are subroles of others (e.g. :class:`Weight` is a subrole of :class:`Parameter`). This function will not add a role if a more specific role has already been added. If you need to replace a role with a parent role (e.g. replace :class:`Weight` with :class:`Parameter`) you must do so manually. """ if roles is None: return variables roles = tuple([name_to_roles(r) for r in as_tuple(roles)]) # create tag attribute for variable for var in as_tuple(variables): # append roles scope var_roles = get_roles(var, return_string=False) + \ roles + \ get_current_role_scope() # ====== handle string roles first ====== # _ = [] for r in var_roles: if isinstance(r, string_types): _add_to_collection_no_duplication(r, var) elif isinstance(r, type) and issubclass(r, Role): _.append(r) var_roles = _ # ====== shrink the roles so there is NO subrole ====== # new_roles = [] for r in var_roles: # issubclass(r0=LearningRate, r=OptimizerVariable) = True # hence, remove var from `r` collection if any(r != r0 and issubclass(r0, r) for r0 in var_roles): r_collection = tf.get_collection_ref(r.__name__) if var in r_collection: r_collection.remove(var) else: new_roles.append(r) # ====== adding new role ====== # for r in new_roles: _add_to_collection_no_duplication(r.__name__, var) return variables
def __init__(self, data_desc, dtype=None, batch_filter=None, batch_mode='batch', ncpu=1, buffer_size=8, hwm=86, mpi_backend='python'): super(Feeder, self).__init__(data=as_tuple(data_desc, t=IndexedData), read_only=True) # find intersection of all indices in IndexedData self._indices_keys = async (lambda: np.array(list( set.intersection(*[set(dat.indices.keys()) for dat in self._data])), dtype=str))() # ====== desire dtype ====== # nb_data = sum(len(dat._data) for dat in self._data) self._output_dtype = as_tuple(dtype, N=nb_data) # ====== Set default recipes ====== # self._recipes = RecipeList() self._recipes.set_feeder_info(nb_desc=len(self._data)) self.set_multiprocessing(ncpu, buffer_size, hwm, mpi_backend) # ====== cache shape information ====== # # store first dimension self._cache_shape = None # if the recipes changed the shape need to be recalculated self._recipes_changed = False # ====== Iteration information ====== # self._running_iter = [] # ====== batch mode ====== # if batch_filter is None: batch_filter = _dummy_batch_filter elif not hasattr(batch_filter, '__call__'): raise ValueError('batch_filter must be a function has 1 or 2 ' 'parameters (X) or (X, y).') # check if batch_filter Picklable try: cPickle.dumps(batch_filter, protocol=2) except Exception: raise ValueError( "`batch_filter` must be pickle-able, which must be " "top-level function.") self._batch_filter = batch_filter # check batch_mode batch_mode = str(batch_mode).lower() if batch_mode not in ("batch", 'file'): raise ValueError("Only support `batch_mode`: 'file'; 'batch', but " "given value: '%s'" % batch_mode) self._batch_mode = batch_mode
def __init__(self, input_name, output_name=None, width=9, order=(0, 1), axis=0): super(DeltaExtractor, self).__init__( input_name=as_tuple(input_name, t=string_types), output_name=output_name) # ====== check width ====== # width = int(width) if width % 2 == 0 or width < 3: raise ValueError("`width` must be odd integer >= 3, give value: %d" % width) self.width = width # ====== check order ====== # self.order = as_tuple(order, t=int) # ====== axis ====== # self.axis = axis
def _initialize(self, x): input_shape = K.get_shape(x) # ====== validate init arguments ====== # ndim = len(input_shape) - 2 self.ndim = ndim # padding if isinstance(self.pad, (tuple, list, int)): self.pad = as_tuple(self.pad, ndim, int) elif self.pad is None: self.pad = (0, ) * ndim # strides if self.strides is None: self.strides = (0, ) * ndim else: self.strides = as_tuple(self.strides, ndim, int) # dilation if self.dilation is None: self.dilation = (1, ) * ndim else: self.dilation = as_tuple(self.dilation, ndim, int) # filter size self.filter_size = as_tuple(self.filter_size, ndim, int) # ====== create config ====== # config = NNConfig(input_shape=input_shape) # TF kernel shape: (kernel_dim1, kernel_dim2, ..., input_depth, out_depth) kernel_shape = self.filter_size + (input_shape[-1], self.num_filters) # weights config.create_params(self.W_init, shape=kernel_shape, name='W', nnops=self, roles=WEIGHT) if self.b_init is not None: if self.untie_biases: output_shape = get_conv_output_shape( input_shape, kernel_shape, border_mode=self.pad, subsample=self.strides, filter_dilation=self.dilation) biases_shape = output_shape[1:] else: biases_shape = (self.num_filters, ) config.create_params(self.b_init, shape=biases_shape, name='b', nnops=self, roles=BIAS) return config
def _apply(self, X, **kwargs): X = as_tuple(X, N=len(self.ops)) results = [ op(x, **_shrink_kwargs(op, kwargs)) for x, op in zip(X, self.ops) ] if callable(self.merge_function): output = self.merge_function(results) for i in as_tuple(output): if not isinstance(K.get_shape(i), tuple): raise Exception( 'returned output from merge_function lost shape ' 'information.') return output else: return results
def __init__(self, distributions: Sequence[Distribution], axis: Axis = 0, validate_args: bool = False, name: Optional[str] = None, **kwargs): parameters = dict(locals()) distributions = as_tuple(distributions) # validate distribution types dist_types = set([type(d) for d in distributions]) assert len(dist_types) == 1, \ f'Only concatenate homogeneous type of distribution but given: {dist_types}' # validate shape information shape_info = [(d.batch_shape, d.event_shape) for d in distributions] batch_ref = shape_info[0][0] event_ref = shape_info[0][1] for batch, event in shape_info: tf.assert_equal( batch.ndims, batch_ref.ndims, f"Rank of batch shapes mismatch {batch.ndims} != {batch_ref.ndims} ") tf.assert_equal(event, event_ref, f"Event shapes mismatch {event} != {event_ref} ") self._distributions = distributions self._batch_ndims = batch_ref.ndims self._axis = int(axis) % self._batch_ndims super(Batchwise, self).__init__( dtype=self._distributions[0].dtype, reparameterization_type=self._distributions[0].reparameterization_type, validate_args=validate_args, allow_nan_stats=self._distributions[0].allow_nan_stats, parameters=parameters, name=name)
def initialize_all_variables(vars=None): """ This function will automatically check if the variables are initialized, and only perform initialization for un-initialized variables. Note ---- Re-initialize an initialized variable will give it random values """ if vars is None: vars = get_all_variables() else: vars = [v for v in as_tuple(vars) if is_variable(v)] # ====== check if variable not initialized ====== # init_info = eval([tf.is_variable_initialized(v) for v in vars]) vars = [v for v, inited in zip(vars, init_info) if not inited] # ====== build mapping graph -> list of vars ====== # graph = defaultdict(list) for v in vars: graph[v.graph].append(v) # ====== run the initialization ====== # for g, v in graph.items(): get_session(graph=g).run([i.initializer for i in v])
def __init__(self, input_name=None, shrink_mode='right'): super(EqualizeShape0, self).__init__( input_name=as_tuple(input_name, t=string_types) if input_name is not None else None) shrink_mode = str(shrink_mode).lower() if shrink_mode not in ('center', 'left', 'right'): raise ValueError("shrink mode support include: center, left, right") self.shrink_mode = shrink_mode
def function(inputs, outputs, updates=[], defaults={}, training=None, batch_size=None, batch_vars=[]): """ Parameters ---------- inputs: list of `tf.placeholder` or `tf.Variable` outputs: list of `tf.Tensor` updates: list, or dict mapping from `Tensor` to its new value which is `Tensor` or real value. defaults: dict mapping from `Variable` or `placeholder` to its default values. training: None, True, False if `training=None`, left the training mode unchanged if `training=True`, turn on training mode only when execute this function. if `training=False`, disable training mode only when execute this function. batch_size : {int, None} (default: None) if `batch_size` is not None, auto-split all array into minibatch, and return a list of outputs (all array must have equal `shape[0]`) batch_vars : {Tensor, list of Tensor} if `len(batch_vars) == 0`, split mini-batches for all Tensor inputs, otherwise, only applying for a selected set of inputs. """ # ====== check inputs ====== # if inputs is None or len(as_tuple(inputs)) == 0: inputs = ComputationGraph(outputs).placeholders inputs_text = ', '.join([str(i) for i in inputs]) print("[WARNING] inputs haven't specified, auto-inferred from Graph of " "outputs, graph inputs: %s" % 'None' if len(inputs) == 0 else inputs_text) return Function(inputs=inputs, outputs=outputs, updates=updates, defaults=defaults, training=training, batch_size=batch_size)
def save_variables(var_list, path, session=None): """ This function only apply for trainable parameters """ if session is None: session = get_session() var_list = [v for v in set(as_tuple(var_list)) if is_variable(v)] name = '|'.join(sorted([v.name for v in var_list])) if name in _saver: saver = _saver[name] else: saver = tf.train.Saver(var_list=var_list, restore_sequentially=False, allow_empty=False) # ====== save the variables ====== # checkpoint = saver.save(session, path, global_step=None, write_meta_graph=False, write_state=False) # ====== save meta-info for recreate variable ====== # var_meta = [] for v in var_list: name = v.name.split(':')[0] dtype = v.dtype.base_dtype.name shape = v.shape.as_list() var_meta.append((name, dtype, shape)) # ====== save the collections ====== # collections = {var.name: role.get_roles(var, return_string=True) for var in var_list} with open(path + '.collections', 'wb') as f: cPickle.dump([collections, var_meta], f, protocol=cPickle.HIGHEST_PROTOCOL) return checkpoint
def add_recipes(self, recipes, name, override=False): """ Parameters ---------- """ # ====== validate arguments ====== # if not is_string(name): raise ValueError("`name` must be string, but given: %s" % str(type(name))) if name in self._saved_recipes and not override: raise ValueError("Cannot override pre-defined RECIPE with name: '%s'" % name) # ====== validate recipes list ====== # if isinstance(recipes, RecipeList): recipes = tuple(recipes._recipes) else: tmp = [] for rcp in as_tuple(recipes, t=FeederRecipe): if isinstance(rcp, RecipeList): tmp += list(rcp._recipes) else: tmp.append(rcp) recipes = tuple(tmp) # ====== store the recipes to disk ====== # path = os.path.join(self.recipe_path, name) with open(path, 'wb') as f: cPickle.dump(recipes, f, protocol=cPickle.HIGHEST_PROTOCOL) # ====== update local recipes list ====== # self._saved_recipes[name] = recipes return self
def __init__(self, n_classes: int, event_shape: List[int], name: str = 'RepetitionEmbedding'): super().__init__(name=name) self.n_classes = int(n_classes) self._event_shape = as_tuple(event_shape, t=int)
def classification_report(y_pred, y_true, labels): """ Parameters ---------- pass Return ------ Classification report in form of string """ from sklearn.metrics import accuracy_score, classification_report, confusion_matrix # ====== validate labels ====== # labels = as_tuple(labels) target_names = [str(i) for i in labels] labels = list(range(0, len(labels))) # ====== create report ====== # s = "" s += "Accuracy: %f\n" % accuracy_score(y_true, y_pred, normalize=True) s += "Confusion matrix:\n" s += str(confusion_matrix(y_true, y_pred, labels=labels)) + '\n' s += "Report:\n" s += str( classification_report(y_true, y_pred, labels=labels, digits=3, target_names=target_names)) return s
def __init__(self, data_desc, dtype=None, batch_filter=None, batch_mode='batch', ncpu=1, buffer_size=8, hwm=86, mpi_backend='python'): super(Feeder, self).__init__(data=as_tuple(data_desc, t=IndexedData), read_only=True) # find intersection of all indices in IndexedData self._indices_keys = async( lambda: np.array( list(set.intersection(*[set(dat.indices.keys()) for dat in self._data])), dtype=str) )() # ====== desire dtype ====== # nb_data = sum(len(dat._data) for dat in self._data) self._output_dtype = as_tuple(dtype, N=nb_data) # ====== Set default recipes ====== # self._recipes = RecipeList() self._recipes.set_feeder_info(nb_desc=len(self._data)) self.set_multiprocessing(ncpu, buffer_size, hwm, mpi_backend) # ====== cache shape information ====== # # store first dimension self._cache_shape = None # if the recipes changed the shape need to be recalculated self._recipes_changed = False # ====== Iteration information ====== # self._running_iter = [] # ====== batch mode ====== # if batch_filter is None: batch_filter = _dummy_batch_filter elif not hasattr(batch_filter, '__call__'): raise ValueError('batch_filter must be a function has 1 or 2 ' 'parameters (X) or (X, y).') # check if batch_filter Picklable try: cPickle.dumps(batch_filter, protocol=2) except Exception: raise ValueError("`batch_filter` must be pickle-able, which must be " "top-level function.") self._batch_filter = batch_filter # check batch_mode batch_mode = str(batch_mode).lower() if batch_mode not in ("batch", 'file'): raise ValueError("Only support `batch_mode`: 'file'; 'batch', but " "given value: '%s'" % batch_mode) self._batch_mode = batch_mode
def __init__(self, converter, input_name='name', output_name='name'): super(Converter, self).__init__(input_name=as_tuple(input_name, t=string_types), output_name=str(output_name)) # ====== check converter ====== # if not hasattr(converter, '__call__') and \ not isinstance(converter, Mapping): raise ValueError("`converter` must be call-able.") # converter can be function or dictionary self.converter = converter
def __init__(self, dtype, input_name=None, exclude_pattern=".+\_sum[1|2]"): super(AsType, self).__init__( input_name=as_tuple(input_name, t=string_types) if input_name is not None else None) self.dtype = np.dtype(dtype) if isinstance(exclude_pattern, string_types): exclude_pattern = re.compile(exclude_pattern) else: exclude_pattern = None self.exclude_pattern = exclude_pattern
def set_recipes(self, *recipes): # filter out None value recipes = flatten_list(as_tuple(recipes)) recipes = [rcp for rcp in recipes if rcp is not None and isinstance(rcp, FeederRecipe)] # ====== set the new recipes ====== # if len(recipes) > 0: self._recipes = recipes for rcp in self._recipes: rcp.set_feeder_info(self.nb_desc) return self
def _preprocessing_losses(losses, y_true, y_pred, inherit_losses=None, sample_weights=None): """ Can be used for both objectives and metrics """ from odin import backend as K # ====== special cases, only one inputs outputs, and multiple loss ====== # nb_losses = len(losses) if len(y_true) == 0: y_true = [None] * nb_losses elif len(y_true) == 1: y_true = y_true * nb_losses if len(y_pred) == 0: y_pred = [None] * nb_losses elif len(y_pred) == 1: y_pred = y_pred * nb_losses # ====== applying ====== # cost = [] for idx, fn in enumerate(as_tuple(losses)): weight = 1 kwargs = {} # preprocess if isinstance(fn, (tuple, list)): if len(fn) == 1: fn = fn[0] else: weight = [i for i in fn if is_number(i)] weight = 1 if len(weight) == 0 else weight[0] kwargs = [i for i in fn if isinstance(i, Mapping)] kwargs = {} if len(kwargs) == 0 else kwargs[0] fn = [i for i in fn if i != weight and i != kwargs][0] # apply the loss if is_number(fn): if inherit_losses is None or fn >= len(inherit_losses): raise ValueError("Cannot find losses at index: '%d'" % fn) obj = inherit_losses[fn] elif K.is_tensor(fn): obj = fn elif hasattr(fn, '__call__'): try: sign = inspect.signature(fn) if 'weights' in sign.parameters and sample_weights is not None: kwargs['weights'] = sample_weights except ValueError: pass finally: obj = fn(y_true[idx], y_pred[idx], **kwargs) if isinstance(obj, (tuple, list)): wprint("function: '%s' return %d outputs (%s), only pick the first one" % (fn.__name__, len(obj), '; '.join([str(i) for i in obj]))) obj = obj[0] cost.append((weight, obj)) # ====== reduce ====== # return [c if w == 1 else w * c for w, c in cost]
def initialize(self, X, y=None): if isinstance(X, (tuple, list)): X = np.array(X) elif not isinstance(X, np.ndarray): X = X[:] if isinstance(y, (tuple, list)): y = np.array(y) elif y is not None and not isinstance(y, np.ndarray): y = y[:] # ====== check dimensions ====== # feat_dim = X.shape[1] if self._feat_dim is None: self._feat_dim = feat_dim if y is not None: classes = np.unique(y) if self._labels is None: self._labels = classes else: classes = self.labels # ====== exception ====== # if self.feat_dim != feat_dim: raise ValueError("Initialized with `feat_dim`=%d, given data with %d " "dimensions" % (self.feat_dim, feat_dim)) if self.nb_classes != len(classes): raise ValueError("Initialized with `nb_classes`=%d, given data with %d " "classes" % (self.nb_classes, len(classes))) # ====== normalizing ====== # if not self._normalizer.is_fitted: self._normalizer.fit(X, y) X = self._normalizer.transform(X) # ====== initialize GMMs ====== # if self._gmm is None: if self._strategy == 'ova': self._gmm = [] rand = np.random.RandomState(seed=self._seed) for n_components in as_tuple(self._n_components, t=int, N=self.nb_classes): gmm = GaussianMixture(n_components=n_components, covariance_type=self._covariance_type, max_iter=self._max_iter, n_init=self._n_init, init_params=self._init_params, random_state=rand.randint(0, 10e8)) self._gmm.append(gmm) elif self._strategy == 'all': gmm = GaussianMixture(n_components=self.nb_classes, covariance_type=self._covariance_type, max_iter=self._max_iter, n_init=self._n_init, init_params=self._init_params, means_init=np.array([X[y == clz].mean(axis=0) for clz in np.unique(y)]), random_state=self._seed) self._gmm = gmm else: raise ValueError("No support for `strategy`=%s" % self._strategy) # ====== return ====== # if y is None: return X return X, y
def set_callbacks(self, callbacks): if callbacks is None: callbacks = [] elif isinstance(callbacks, CallbackList): callbacks = callbacks._callbacks else: callbacks = as_tuple(callbacks, t=lambda x: isinstance(x, (Callback, type(None)))) callbacks = [i for i in callbacks if i is not None] self._callbacks = [i for i in set(callbacks)] return self
def get_loaded_param(self, name): ds = self.__class__.load_parameters() if is_string(name): return_1_param = True else: return_1_param = False name = as_tuple(name, t=str) if any(n not in ds for n in name): raise RuntimeError("Cannot find parameter with name:'%s' from loaded " "dataset at path: '%s'" % (name, ds.path)) params = [ds[n][:] for n in name] return params[0] if return_1_param else tuple(params)
def __init__(self, data, axis=-1): data = as_tuple(data) if len(data) < 2: raise ValueError("2 or more Data must be given to `DataConcat`") if axis == 0: raise ValueError("Cannot concatenate axis=0") if len(set(d.ndim for d in data)) > 2: raise ValueError("All Data must have the same number of dimension (i.e. `ndim`)") if len(set(d.shape[0] for d in data)) > 2: raise ValueError("All Data must have the same length (i.e. first dimension)") super(DataConcat, self).__init__(data, read_only=True) self._is_data_list = False self._axis = axis_normalize(int(axis), ndim=data[0].ndim)
def _initialize(self, X, y=None): with tf.name_scope(self.name): # ====== input_shape ====== # if self._input_shape is None: self._input_shape = X.shape elif self.input_shape[1:] != X.shape[1:]: raise ValueError("Initialized with input shape: %s, given tensor with shape: %s" % (self.input_shape, X.shape)) # ====== output_shape ====== # if self._output_shape is None: self._output_shape = y.shape elif self.output_shape[1:] != y.shape[1:]: raise ValueError("Initialized with output shape: %s, given tensor with shape: %s" % (self.output_shape, y.shape)) # ====== placeholder ====== # self._X = K.placeholder(shape=self.input_shape, dtype=self.dtype, name='input') self._y = K.placeholder(shape=self.output_shape, dtype=self.dtype, name='output') # ====== run the network ====== # y_pred_logits = self.network.apply(self._X) nb_classes = y_pred_logits.shape.as_list()[-1] if len(self._output_shape) == 1: y_true = tf.one_hot(indices=tf.cast(self._y, 'int32'), depth=nb_classes) elif self._output_shape[-1] != nb_classes: raise ValueError("Given %d classes, but output from network has %s classes" % (self._output_shape[-1], nb_classes)) self._nb_classes = nb_classes # ====== sigmoid or softmax ====== # if nb_classes == 2: fn_activation = tf.nn.sigmoid fn_loss = tf.losses.sigmoid_cross_entropy fn_acc = K.metrics.binary_accuracy else: fn_activation = tf.nn.softmax fn_loss = tf.losses.softmax_cross_entropy fn_acc = K.metrics.categorical_accuracy y_pred_proba = fn_activation(y_pred_logits) # ====== class weight ====== # class_weights = np.ones(shape=(nb_classes,), dtype=self.dtype) if self._class_weights is None\ else as_tuple(self._class_weights, N=self.nb_classes, t=float) class_weights = tf.constant(value=class_weights, dtype=self.dtype, name="class_weights") weights = tf.gather(class_weights, tf.cast(self._y, 'int32') if self.nb_classes == 2 else tf.argmax(self._y, axis=-1)) # ====== objectives ====== # cost_train = fn_loss(y_true, logits=y_pred_logits, weights=weights) exit()
def batch_end(self, task, batch_results): if self._task_name is not None and task.name != self._task_name: return # found any NaN values if self._detect_inf: fn = lambda x: np.logical_or(np.isinf(x), np.isnan(x)) else: fn = lambda x: np.isnan(x) if any(np.any(fn(r)) for r in as_tuple(batch_results)): signal = TrainSignal.ROLLBACK self._patience -= 1 if self._patience <= 0: # but if out of patience, stop signal = TrainSignal.STOP self.send_notification('Found NaN or Inf value, task:"%s"' % task.name) return signal
def __init__(self, nb_words=None, char_level=False, preprocessors=[TransPreprocessor(), CasePreprocessor(lower=True)], filters=None, stopwords=False, lemmatization=True, language='en', batch_size=2048, nb_processors=None, order='word', engine='odin', print_progress=True): # ====== internal states ====== # if engine not in ('spacy', 'odin'): raise ValueError('We only support 2 text processing engines: Spacy, or ODIN.') if order not in ('word', 'doc'): raise ValueError('The "order" argument must be "doc" or "word".') self.__engine = engine self.__order = order self.__longest_document = ['', 0] self.print_progress = print_progress # ====== dictionary info ====== # self._nb_words = nb_words self.nb_docs = 0 self.char_level = char_level self.language = language self._word_counts = defaultdict(int) # number of docs the word appeared self._word_docs = defaultdict(int) # actual dictionary used for embedding self._word_dictionary = OrderedDict() self._word_dictionary_info = OrderedDict() self.stopwords = stopwords self.lemmatization = lemmatization self.batch_size = batch_size self.nb_processors = (int(nb_processors) if nb_processors is not None else cpu_count()) # ====== filter and preprocessor ====== # self.filters = filters if filters is None else as_tuple(filters) if preprocessors is None: preprocessors = [] elif not isinstance(preprocessors, (tuple, list)): preprocessors = [preprocessors] self.preprocessors = preprocessors
def __init__(self, data, read_only): # batch information self._batch_size = 256 self._start = 0. self._end = 1. self._seed = None self._shuffle_level = 0 # ====== main data ====== # # object that have shape, dtype ... self._data = as_tuple(data) if isinstance(data, (tuple, list)): self._is_data_list = True else: self._is_data_list = False self._read_only = bool(read_only) # ====== special flags ====== # # to detect if cPickle called with protocol >= 2 self._new_args_called = False # flag show that array valued changed self._status = 0
def _process_noise_dim(input_shape, dims): """ By default, each element is kept or dropped independently. If `noise_shape` is specified, it must be [broadcastable](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html) to the shape of `x`, and only dimensions with `noise_shape[i] == shape(x)[i]` will make independent decisions. For example, if `shape(x) = [k, l, m, n]` and `noise_shape = [k, 1, 1, n]`, each batch and channel component will be kept independently and each row and column will be kept or not kept together. Examples -------- (None, 10, 10) with noise_dims=2 => Noise mask: (None, 10, 1) """ if dims is None: return input_shape ndims = input_shape.shape[0].value dims = [i % ndims for i in as_tuple(dims, t=int)] # ====== get noise shape ====== # return tuple([1 if i in dims else input_shape[i] for i in range(ndims)])
def __init__(self, task_name, output_name, fn_reduce=lambda x: (np.mean(x) if isinstance(x[0], Number) else sum(i for i in x)), print_plot=False, save_path=None, repeat_freq=1, logging=True): super(EpochSummary, self).__init__(logging=logging) self._task_name = as_tuple(task_name, t=str) # ====== scheduling ====== # assert repeat_freq >= 1 self._repeat_freq = int(repeat_freq) self._count = self._repeat_freq * len(self._task_name) self._epoch_results = defaultdict(dict) # ====== output identity ====== # if not isinstance(output_name, (tuple, list, set)): output_name = (output_name,) output_name = [i if is_string(i) else i.name for i in output_name] self.output_name = tuple(output_name) self.fn_reduce = FuncDesc(func=fn_reduce) # ====== how to output ====== # self.print_plot = bool(print_plot) self.save_path = save_path