def _quant_var( name, initializer_val, vars_collection=tf.GraphKeys.MOVING_AVERAGE_VARIABLES, ): """Create an var for storing the min/max quantization range.""" return contrib_framework.model_variable( name, shape=[], initializer=tf.constant_initializer(initializer_val), collections=[vars_collection], trainable=False)
def create_latent_space(batch_size, shape, steps=None): """ Create the latent space """ # Setup the latent space. The latent space is a 2-D tensor used for each element in the batch # with dimensions [batch_size, latent_size, latent_size]. If steps are provided then there is a # latent space per step with dimensions [step, batch_size, latent_size, latent_size]. latent_shape = shape if steps is not None: latent_shape = (steps,) + latent_shape latent_space = framework.model_variable( 'LatentSpace', shape=latent_shape, trainable=True, initializer=tf.random_uniform_initializer(0.0, 1e-3)) latent_space = tf.tile(latent_space, (batch_size,) + (1,) * (len(latent_shape) - 1)) latent_space = tf.reshape(latent_space, (batch_size,) + latent_shape) if steps is not None: permutation = (1, 0) + tuple(x + 2 for x in range(len(shape))) latent_space = tf.transpose(latent_space, permutation) return latent_space
def spatial_conv(inputs, conv_type, kernel, filters, stride, is_training, activation_fn='relu', data_format='channels_last'): """Performs 1x1 conv followed by 2d or depthwise conv. Args: inputs: `Tensor` of size `[batch*time, height, width, channels]`. Only supports 'channels_last' as the data format. conv_type: 'string' of "std", "depth", "maxpool", or "avgpool" this selects the spatial conv/pooling method. kernel: `int` kernel size to be used for `conv2d` or max_pool2d` operations. Should be a positive integer. filters: `int` number of filters in the convolution. stride: 'int' temporal stride is_training: 'bool' specifying whether in training mode or not. activation_fn: 'string' the activation function to use (relu or swish) data_format: `str`. Only supports 'channels_last' as the data format. Returns: A `Tensor` of the same data_format """ if kernel == 1: return inputs use_relu = (activation_fn == 'relu') if conv_type == 'std' or conv_type == 'depth': inputs = conv2d(inputs, 1, filters, 1, is_training, use_relu=use_relu) if not use_relu: inputs = hard_swish(inputs) if conv_type == 'std' or conv_type == '1std': inputs = conv2d(inputs, int(kernel), filters, int(stride), is_training, use_relu=use_relu) if not use_relu: inputs = hard_swish(inputs) elif conv_type == 'depth': depth_multiplier = 1 depthwise_kernel_shape = (int(kernel), int(kernel), inputs.shape[-1], depth_multiplier) depthwise_kernel = contrib_framework.model_variable( name='depthwise_kernel', shape=depthwise_kernel_shape, dtype=tf.float32, initializer=contrib_layers.variance_scaling_initializer( factor=2.0, mode='FAN_IN', uniform=False), trainable=True) inputs = tf.nn.depthwise_conv2d( inputs, tf.cast(depthwise_kernel, inputs.dtype), strides=[1, int(stride), int(stride), 1], padding='SAME', rate=[1, 1], data_format='NHWC' if data_format == 'channels_last' else 'NCHW') inputs = bn.batch_norm_relu( inputs, is_training, relu=use_relu, data_format=data_format) if not use_relu: inputs = hard_swish(inputs) elif conv_type == 'maxpool': inputs = tf.layers.max_pooling2d( inputs, int(kernel), int(stride), padding='same', data_format=data_format) elif conv_type == 'avgpool': inputs = tf.layers.average_pooling2d( inputs, int(kernel), int(stride), padding='same', data_format=data_format) return inputs
def matrix_pool_sparse( inputs, #pool the tensor: input: N x M x K along two dimensions layer_params, verbose=1, scope=None, **kwargs): inp_values = inputs['input'] units_in = inputs['units'] mask_indices = inputs['mask_indices'] pool_mode = layer_params.get('pool_mode', 'max') #max or average pooling mode = layer_params.get('mode', 'dense') shape = inputs['shape'] N, M = shape eps = tf.convert_to_tensor(1e-3, dtype=np.float32) with tf.variable_scope(scope, default_name="matrix_sparse"): theta_n = model_variable("theta_n", shape=[units_in, units_in], trainable=True, dtype=tf.float32) theta_m = model_variable("theta_m", shape=[units_in, units_in], trainable=True, dtype=tf.float32) if pool_mode is 'mean': norm_0 = sparse_marginalize_mask( mask_indices, shape=shape, axis=0, keep_dims=True) + eps norm_1 = sparse_marginalize_mask( mask_indices, shape=shape, axis=1, keep_dims=True) + eps nvec = sparse_reduce(mask_indices, inp_values, units_in, mode='sum', shape=shape, axis=1, keep_dims=True) / norm_1 mvec = sparse_reduce(mask_indices, inp_values, units_in, mode='sum', shape=shape, axis=0, keep_dims=True) / norm_0 else: nvec = sparse_reduce(mask_indices, inp_values, units_in, mode=pool_mode, shape=shape, axis=1, keep_dims=True) mvec = sparse_reduce(mask_indices, inp_values, units_in, mode=pool_mode, shape=shape, axis=0, keep_dims=True) nvec = tf.tensordot(nvec, theta_n, axes=1) nvec.set_shape([N, 1, units_in]) #because of current tensorflow bug!! mvec = tf.tensordot(mvec, theta_m, axes=1) mvec.set_shape([1, M, units_in]) #because of current tensorflow bug!! outdic = { 'nvec': nvec, 'mvec': mvec, 'mask_indices': mask_indices, 'units': units_in, 'shape': shape } return outdic
def matrix_sparse(inputs, layer_params, reuse=None, scope=None, verbose=1, **kwargs): units = layer_params.get('units') if not scope: scope = "matrix_sparse" with tf.variable_scope( scope, default_name="matrix_sparse", initializer=layer_params.get('kernel_initializer', None), regularizer=layer_params.get('regularizer', None), reuse=reuse, ): #we should have the input matrix or at least one vector per dimension assert (('nvec' in inputs and 'mvec' in inputs) or 'input' in inputs) mat_values = inputs.get('input', None) #N x M x K mask_indices = inputs.get('mask_indices', None) skip_connections = layer_params.get('skip_connections', False) shape = inputs['shape'] N, M = shape K = inputs['units'] output = tf.convert_to_tensor(0, np.float32) if mat_values is not None: #if we have an input matrix. If not, we only have nvec and mvec, i.e., user and movie properties if 'max' in layer_params.get('pool_mode', 'max'): mat_marg_0 = sparse_reduce(mask_indices, mat_values, K, shape=shape, mode='max', axis=0, keep_dims=True) mat_marg_1 = sparse_reduce(mask_indices, mat_values, K, shape=shape, mode='max', axis=1, keep_dims=True) mat_marg_2 = sparse_reduce(mask_indices, mat_values, K, shape=shape, mode='max', axis=None, keep_dims=True) elif layer_params['pool_mode'] == 'mean': mat_marg_0 = weighted_mean_reduce(mask_indices, mat_values, K, shape=shape, logweights=inputs.get( 'weights_row', None), axis=0) # 1 x M x K mat_marg_1 = weighted_mean_reduce(mask_indices, mat_values, K, shape=shape, logweights=inputs.get( 'weights_col', None), axis=1) # N x 1 x K mat_marg_2 = weighted_mean_reduce(mask_indices, mat_values, K, shape=shape, logweights=inputs.get( 'weights_both', None), axis=None) # 1 x 1 x K else: raise KeyError("Unrecognised pool mode: %s" % layer_params["pool_mode"]) theta_0 = model_variable("theta_0", shape=[K, units], trainable=True, dtype=tf.float32) theta_1 = model_variable("theta_1", shape=[K, units], trainable=True, dtype=tf.float32) theta_2 = model_variable("theta_2", shape=[K, units], trainable=True, dtype=tf.float32) theta_3 = model_variable("theta_3", shape=[K, units], trainable=True, dtype=tf.float32) output = sparse_tensordot_sparse(mat_values, theta_0, K) output_0 = tf.tensordot(mat_marg_0, theta_1, axes=[[2], [0]]) # 1 x M x units output = sparse_tensor_broadcast_dense_add(output, output_0, mask_indices, units, broadcast_axis=0) output_1 = tf.tensordot(mat_marg_1, theta_2, axes=[[2], [0]]) # N x 1 x units output = sparse_tensor_broadcast_dense_add(output, output_1, mask_indices, units, broadcast_axis=1) output_2 = tf.tensordot(mat_marg_2, theta_3, axes=[[2], [0]]) # 1 x 1 x units output = sparse_tensor_broadcast_dense_add(output, output_2, mask_indices, units, broadcast_axis=None) nvec = inputs.get('nvec', None) mvec = inputs.get('mvec', None) if nvec is not None: theta_4 = model_variable("theta_4", shape=[K, units], trainable=True) output_tmp = tf.tensordot(nvec, theta_4, axes=[[2], [0]]) # N x 1 x units output_tmp.set_shape([N, 1, units]) #because of current tensorflow bug!! if mat_values is not None: output = sparse_tensor_broadcast_dense_add(output, output_tmp, mask_indices, units, broadcast_axis=1) else: # output = output + output_tmp output = dense_vector_to_sparse_values(output_tmp, mask_indices) + output if mvec is not None: theta_5 = model_variable("theta_5", shape=[K, units], trainable=True) output_tmp = tf.tensordot(mvec, theta_5, axes=[[2], [0]]) # 1 x M x units output_tmp.set_shape([1, M, units]) #because of current tensorflow bug!! if mat_values is not None: output = sparse_tensor_broadcast_dense_add(output, output_tmp, mask_indices, units, broadcast_axis=0) else: # output = output + output_tmp output = dense_vector_to_sparse_values(output_tmp, mask_indices) + output if layer_params.get("individual_bias", False): # for testing my individual bias idea - I don't think it is helpful print( "Using individual bias in scope %s" % tf.contrib.framework.get_name_scope(), kwargs["sizes"]) row_bias = model_variable("row_bias", shape=[kwargs["sizes"][0]], trainable=True) column_bias = model_variable("column_bias", shape=[kwargs["sizes"][1]], trainable=True) #mask_indices = tf.cast(mask_indices, dtype=tf.float32) r_bias = tf.reshape(tf.gather(column_bias, inputs['col']), (1, -1, 1)) c_bias = tf.reshape(tf.gather(row_bias, inputs['row']), (-1, 1, 1)) output += r_bias - tf.reduce_mean(r_bias) output += c_bias - tf.reduce_mean(c_bias) if layer_params.get('activation', None) is not None: if verbose == 1: print("Applying activation: %s" % layer_params["activation"]) output = layer_params.get('activation')(output) if skip_connections and mat_values is not None and K == units: output = output + mat_values # if mat_values is None: # output = dense_tensor_to_sparse_values(output, mask_indices, units) outdic = { 'input': output, 'mask_indices': mask_indices, 'units': units, 'shape': shape } if layer_params.get("attention_pooling", False): gamma_0 = model_variable("gamma_0", shape=[K, units], trainable=True, dtype=tf.float32) gamma_1 = model_variable("gamma_1", shape=[K, units], trainable=True, dtype=tf.float32) gamma_2 = model_variable("gamma_2", shape=[K, units], trainable=True, dtype=tf.float32) c = 1. if mat_values is not None: outdic["weights_row"] = tf.tensordot(mat_marg_1 * c, gamma_2, axes=[[2], [0]]) outdic["weights_col"] = tf.tensordot(mat_marg_0 * c, gamma_1, axes=[[2], [0]]) outdic["weights_both"] = sparse_tensordot_sparse( mat_values * c, gamma_0, K) else: outdic["weights_row"] = tf.tensordot(nvec * c, gamma_2, axes=[[2], [0]]) outdic["weights_col"] = tf.tensordot(mvec * c, gamma_1, axes=[[2], [0]]) return outdic
def matrix_dense(inputs, layer_params, reuse=None, scope=None, verbose=1, **kwargs): ''' mat, # N x M x K input matrix mask = None, # N x M x 1 the observed entries nvec = None, # N x 1 x K' features for rows mvec = None, # 1 x M x K'' features for cols ''' units = layer_params.get('units') if not scope: scope = "matrix_dense" with tf.variable_scope( scope, default_name="matrix_dense", initializer=layer_params.get('kernel_initializer', None), regularizer=layer_params.get('regularizer', None), reuse=reuse, ): #we should have the input matrix or at least one vector per dimension assert (('nvec' in inputs and 'mvec' in inputs) or 'input' in inputs) eps = tf.convert_to_tensor(1e-3, dtype=np.float32) mat = inputs.get('input', None) #N x M x K mask = inputs.get('mask', None) #N x M skip_connections = layer_params.get('skip_connections', False) output = tf.convert_to_tensor(0, np.float32) config_string = "Using the following terms: " sign = 1 overparam = layer_params.get('overparam', False) indn = inputs['indn'] indm = inputs['indm'] if layer_params.get('bias', True): bias = model_variable("bias", shape=[units], trainable=True) output += sign * bias sign *= -1 if mat is not None: #if we have an input matrix. If not, we only have nvec and mvec, i.e., user and movie properties N, M, K = mat.get_shape().as_list() norm_N = np.float32(N) norm_M = np.float32(M) norm_NM = np.float32(N * M) if mask is not None: mat = mat * mask norm_N = tf.reduce_sum(mask, axis=0, keep_dims=True) + eps # 1, M, 1 norm_M = tf.reduce_sum(mask, axis=1, keep_dims=True) + eps # N, 1, 1 norm_NM = tf.reduce_sum(mask, axis=[0, 1], keep_dims=True) + eps # 1, 1, 1 if 'max' in layer_params.get('pool_mode', 'max') and mask is None: mat_marg_0 = tf.reduce_max(mat, axis=0, keep_dims=True) mat_marg_1 = tf.reduce_max(mat, axis=1, keep_dims=True) mat_marg_2 = tf.reduce_max(mat_marg_0, axis=1, keep_dims=True) else: mat_marg_0 = tf.reduce_sum( mat, axis=0, keep_dims=True) / norm_N # 1 x M x K mat_marg_1 = tf.reduce_sum( mat, axis=1, keep_dims=True) / norm_M # N x 1 x K mat_marg_2 = tf.reduce_sum( mat_marg_0, axis=1, keep_dims=True) / norm_NM # 1 x 1 x K if layer_params.get('theta_0', True): config_string += "theta 0, " theta_0 = model_variable("theta_0", shape=[K, units], trainable=True) output += sign * tf.tensordot( mat, theta_0, axes=tf.convert_to_tensor([[2], [0]], dtype=np.int32)) # N x M x units output.set_shape([N, M, units]) #because of current tensorflow bug!! sign *= -1 if layer_params.get('theta_1', True): config_string += "theta 1, " if overparam: MM = inputs['total_shape'][1] theta_1 = tf.get_variable('theta_1', shape=[MM, K, units], trainable=True) theta_1 = tf.gather(theta_1, indm, axis=0) theta_1.set_shape([M, K, units]) output += sign * tf.einsum('ijk,jkl->ijl', mat_marg_0, theta_1) # 1 x M x units else: theta_1 = model_variable("theta_1", shape=[K, units], trainable=True) output += sign * tf.tensordot( mat_marg_0, theta_1, axes=tf.convert_to_tensor( [[2], [0]], dtype=np.int32)) # 1 x M x units output.set_shape([N, M, units]) #because of current tensorflow bug!! sign *= -1 if layer_params.get('theta_2', True): config_string += "theta 2, " if overparam: NN = inputs['total_shape'][0] theta_2 = tf.get_variable('theta_2', shape=[NN, K, units], trainable=True) theta_2 = tf.gather(theta_2, indn, axis=0) theta_2.set_shape([N, K, units]) output += sign * tf.einsum('ijk,ikl->ijl', mat_marg_1, theta_2) else: theta_2 = model_variable("theta_2", shape=[K, units], trainable=True) output += sign * tf.tensordot( mat_marg_1, theta_2, axes=tf.convert_to_tensor( [[2], [0]], dtype=np.int32)) # N x 1 x units output.set_shape([N, M, units]) #because of current tensorflow bug!! sign *= -1 if layer_params.get('theta_3', True): config_string += "theta 3, " theta_3 = model_variable("theta_3", shape=[K, units], trainable=True) output += sign * tf.tensordot( mat_marg_2, theta_3, axes=tf.convert_to_tensor([[2], [0]], dtype=np.int32)) # 1 x 1 x units output.set_shape([N, M, units]) #because of current tensorflow bug!! sign *= -1 nvec = inputs.get('nvec', None) mvec = inputs.get('mvec', None) if layer_params.get('bilinear', False): if nvec is not None: config_string += "bilinear, " _, _, K = nvec.get_shape().as_list() theta_6 = model_variable("theta_6", shape=[K, K, units], trainable=True) output_n = tf.reduce_sum(nvec[:, :, :, None, None] * theta_6[None, None, :, :, :], axis=2) output_m = tf.reduce_sum(output_n * mvec[:, :, :, None], axis=2) output += sign * output_m sign *= -1 if layer_params.get('theta_4', True): if nvec is not None: config_string += "theta 4, " N, _, K = nvec.get_shape().as_list() theta_4 = model_variable("theta_4", shape=[K, units], trainable=True) output_tmp = tf.tensordot( nvec, theta_4, axes=tf.convert_to_tensor([[2], [0]], dtype=np.int32)) # N x 1 x units output_tmp.set_shape([N, 1, units ]) #because of current tensorflow bug!! output += sign * output_tmp sign *= -1 if layer_params.get('theta_5', True): if mvec is not None: config_string += "theta 5, " _, M, K = mvec.get_shape().as_list() theta_5 = model_variable("theta_5", shape=[K, units], trainable=True) output_tmp = tf.tensordot( mvec, theta_5, axes=tf.convert_to_tensor([[2], [0]], dtype=np.int32)) # 1 x M x units output_tmp.set_shape([1, M, units ]) #because of current tensorflow bug!! output += sign * output_tmp sign *= -1 if layer_params.get('activation', None) is not None: output = layer_params.get('activation')(output) if layer_params.get('drop_mask', True): mask = None if skip_connections and mat is not None and K == units: config_string += "with skip connections" output = output + mat print(config_string) outdic = { 'input': output, 'mask': mask, 'total_shape': inputs['total_shape'], 'indn': indn, 'indm': indm } return outdic
def main(opts): gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.9) path = opts['data_path'] data = get_data(path, train=.8, valid=.1, test=.1) #build encoder and decoder and use VAE loss N, M, num_features = data['mat_shape'] maxN, maxM = opts['maxN'], opts['maxM'] if N < maxN: maxN = N if M < maxM: maxM = M if opts['verbose'] > 0: print('\nFactorized Autoencoder run settings:') print('dataset: ', path) print('Exchangable layer pool mode: ', opts['defaults']['matrix_sparse']['pool_mode']) print('Pooling layer pool mode: ', opts['defaults']['matrix_pool_sparse']['pool_mode']) print('learning rate: ', opts['lr']) print('activation: ', opts['defaults']['matrix_sparse']['activation']) print('number of latent features: ', opts['encoder'][-2]['units']) print('maxN: ', opts['maxN']) print('maxM: ', opts['maxM']) print('') with tf.Graph().as_default(): # with tf.device('/gpu:0'): mat_values_tr = tf.placeholder(tf.float32, shape=[None], name='mat_values_tr') mask_indices_tr = tf.placeholder(tf.int32, shape=[None, 2], name='mask_indices_tr') mat_values_val = tf.placeholder(tf.float32, shape=[None], name='mat_values_val') mask_indices_val = tf.placeholder(tf.int32, shape=[None, 2], name='mask_indices_val') mask_indices_tr_val = tf.placeholder(tf.int32, shape=[None, 2], name='mask_indices_tr_val') with tf.variable_scope( None, default_name="input_features", initializer=opts['defaults']['matrix_sparse'].get( 'kernel_initializer', None), regularizer=opts['defaults']['matrix_sparse'].get( 'regularizer', None), reuse=False, ): mvec_feat = model_variable("mvec_feat", shape=[1, M, 1], trainable=True) nvec_feat = model_variable("nvec_feat", shape=[N, 1, 1], trainable=True) with tf.variable_scope("encoder"): tr_dict = { 'input': mat_values_tr, 'mask_indices': mask_indices_tr, 'units': 1, 'mvec': mvec_feat, 'shape': [N, M], 'nvec': nvec_feat } #with tf.variable_scope("encoder"): # tr_dict = {'input':mat_values_tr, # 'mask_indices':mask_indices_tr, # 'units':1} val_dict = { 'input': mat_values_tr, 'mask_indices': mask_indices_tr, 'units': 1, 'mvec': mvec_feat, 'nvec': nvec_feat, 'shape': [N, M] } encoder = Model(layers=opts['encoder'], layer_defaults=opts['defaults'], verbose=2) #define the encoder out_enc_tr = encoder.get_output(tr_dict) #build the encoder out_enc_val = encoder.get_output( val_dict, reuse=True, verbose=0, is_training=False) #get encoder output, reusing the neural net with tf.variable_scope("decoder"): tr_dict = { #'nvec':out_enc_tr['nvec'], #'mvec':out_enc_tr['mvec'], 'input': masked_inner_product(out_enc_tr['nvec'], out_enc_tr['mvec'], mask_indices_tr), 'mask_indices': mask_indices_tr, 'units': 1, #out_enc_tr['units'], 'shape': out_enc_tr['shape'] } val_dict = { #'nvec':out_enc_val['nvec'], #'mvec':out_enc_val['mvec'], 'input': masked_inner_product(out_enc_val['nvec'], out_enc_val['mvec'], mask_indices_tr_val), 'mask_indices': mask_indices_tr_val, 'units': 1, #out_enc_val['units'], 'shape': out_enc_val['shape'] } decoder = Model(layers=opts['decoder'], layer_defaults=opts['defaults'], verbose=2) #define the decoder out_dec_tr = decoder.get_output(tr_dict) #build it out_tr = out_dec_tr['input'] out_dec_val = decoder.get_output( val_dict, reuse=True, verbose=0, is_training=False) #reuse it for validation out_val = out_dec_val['input'] #loss and training rec_loss = rec_loss_fn_sp(mat_values_tr, mask_indices_tr, out_tr, tf.ones(tf.shape(mat_values_tr))) reg_loss = sum(tf.get_collection( tf.GraphKeys.REGULARIZATION_LOSSES)) # regularization rec_loss_val = rec_loss_fn_sp(mat_values_val, mask_indices_val, out_val, data['mask_tr_val_split']) total_loss = rec_loss + reg_loss train_step = tf.train.AdamOptimizer(opts['lr']).minimize(total_loss) #train_step = tf.train.GradientDescentOptimizer(opts['lr']).minimize(total_loss) sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) sess.run(tf.global_variables_initializer()) if 'by_row_column_density' in opts['sample_mode']: iters_per_epoch = math.ceil(N // maxN) * math.ceil( M // maxM ) # a bad heuristic: the whole matrix is in expectation covered in each epoch elif 'uniform_over_dense_values' in opts['sample_mode']: minibatch_size = np.minimum(opts['minibatch_size'], data['mask_indices_tr'].shape[0]) iters_per_epoch = data['mask_indices_tr'].shape[0] // minibatch_size min_loss = 5 min_loss_epoch = 0 losses = OrderedDict() losses["train"] = [] losses["valid"] = [] for ep in range(opts['epochs']): begin = time.time() loss_tr_, rec_loss_tr_, loss_val_, loss_ts_ = 0, 0, 0, 0 if 'by_row_column_density' in opts['sample_mode']: for indn_, indm_ in tqdm( sample_submatrix(data['mask_tr'], maxN, maxM, sample_uniform=False), total=iters_per_epoch): #go over mini-batches inds_ = np.ix_( indn_, indm_, [0] ) #select a sub-matrix given random indices for users/movies mat_sp = data['mat_tr_val'][inds_] * data['mask_tr'][inds_] mat_values = dense_array_to_sparse(mat_sp)['values'] mask_indices = dense_array_to_sparse( data['mask_tr'][inds_])['indices'][:, 0:2] tr_dict = { mat_values_tr: mat_values, mask_indices_tr: mask_indices } _, bloss_, brec_loss_ = sess.run( [train_step, total_loss, rec_loss], feed_dict=tr_dict) loss_tr_ += np.sqrt(bloss_) rec_loss_tr_ += np.sqrt(brec_loss_) elif 'uniform_over_dense_values' in opts['sample_mode']: for sample_ in tqdm(sample_dense_values_uniform( data['mask_indices_tr'], minibatch_size, iters_per_epoch), total=iters_per_epoch): mat_values = data['mat_values_tr'][sample_] mask_indices = data['mask_indices_tr'][sample_] tr_dict = { mat_values_tr: mat_values, mask_indices_tr: mask_indices } _, bloss_, brec_loss_ = sess.run( [train_step, total_loss, rec_loss], feed_dict=tr_dict) loss_tr_ += np.sqrt(bloss_) rec_loss_tr_ += np.sqrt(brec_loss_) else: print('\nERROR - unknown <sample_mode> in main()\n') return loss_tr_ /= iters_per_epoch rec_loss_tr_ /= iters_per_epoch new_nvec, new_mvec = sess.run([nvec_feat, mvec_feat]) ## Validation Loss val_dict = { mat_values_tr: data['mat_values_tr'], mask_indices_tr: data['mask_indices_tr'], mat_values_val: data['mat_values_tr_val'], mask_indices_val: data['mask_indices_val'], mask_indices_tr_val: data['mask_indices_tr_val'] } bloss_, = sess.run([rec_loss_val], feed_dict=val_dict) loss_val_ += np.sqrt(bloss_) if loss_val_ < min_loss: # keep track of the best validation loss min_loss = loss_val_ min_loss_epoch = ep losses['train'].append(loss_tr_) losses['valid'].append(loss_val_) print( "epoch {:d} took {:.1f} training loss {:.3f} (rec:{:.3f}) \t validation: {:.3f} \t minimum validation loss: {:.3f} at epoch: {:d} \t test loss: {:.3f}" .format(ep, time.time() - begin, loss_tr_, rec_loss_tr_, loss_val_, min_loss, min_loss_epoch, loss_ts_)) return losses
def normalize_to_target(inputs, target_norm_value, dim, epsilon=1e-7, trainable=True, scope='NormalizeToTarget', summarize=True): """L2 normalizes the inputs across the specified dimension to a target norm. This op implements the L2 Normalization layer introduced in Liu, Wei, et al. "SSD: Single Shot MultiBox Detector." and Liu, Wei, Andrew Rabinovich, and Alexander C. Berg. "Parsenet: Looking wider to see better." and is useful for bringing activations from multiple layers in a convnet to a standard scale. Note that the rank of `inputs` must be known and the dimension to which normalization is to be applied should be statically defined. TODO(jonathanhuang): Add option to scale by L2 norm of the entire input. Args: inputs: A `Tensor` of arbitrary size. target_norm_value: A float value that specifies an initial target norm or a list of floats (whose length must be equal to the depth along the dimension to be normalized) specifying a per-dimension multiplier after normalization. dim: The dimension along which the input is normalized. epsilon: A small value to add to the inputs to avoid dividing by zero. trainable: Whether the norm is trainable or not scope: Optional scope for variable_scope. summarize: Whether or not to add a tensorflow summary for the op. Returns: The input tensor normalized to the specified target norm. Raises: ValueError: If dim is smaller than the number of dimensions in 'inputs'. ValueError: If target_norm_value is not a float or a list of floats with length equal to the depth along the dimension to be normalized. """ with tf.variable_scope(scope, 'NormalizeToTarget', [inputs]): if not inputs.get_shape(): raise ValueError('The input rank must be known.') input_shape = inputs.get_shape().as_list() input_rank = len(input_shape) if dim < 0 or dim >= input_rank: raise ValueError( 'dim must be non-negative but smaller than the input rank.') if not input_shape[dim]: raise ValueError('input shape should be statically defined along ' 'the specified dimension.') depth = input_shape[dim] if not (isinstance(target_norm_value, float) or (isinstance(target_norm_value, list) and len(target_norm_value) == depth) and all([isinstance(val, float) for val in target_norm_value])): raise ValueError( 'target_norm_value must be a float or a list of floats ' 'with length equal to the depth along the dimension to ' 'be normalized.') if isinstance(target_norm_value, float): initial_norm = depth * [target_norm_value] else: initial_norm = target_norm_value target_norm = contrib_framework.model_variable(name='weights', dtype=tf.float32, initializer=tf.constant( initial_norm, dtype=tf.float32), trainable=trainable) if summarize: mean = tf.reduce_mean(target_norm) tf.summary.scalar(tf.get_variable_scope().name, mean) lengths = epsilon + tf.sqrt(tf.reduce_sum(tf.square(inputs), dim, True)) mult_shape = input_rank * [1] mult_shape[dim] = depth return tf.reshape(target_norm, mult_shape) * tf.truediv( inputs, lengths)