def _default_study_hyperparams(num_conv_layers, num_fc_layers): """Create the default hyperparameter values given a number of layers. Args: num_conv_layers: non-negative integer number of convolutional layers num_fc_layers: non-negative integer number of hidden layers Returns: A tf.HParams instance holding default hyperparameters. """ hypers = { 'nonlinearity': 'tanh', 'learn_rate': 0.005, 'momentum': 0.9, 'output_init_factor': 1.0, 'dropouts': [0.0] * (num_fc_layers + 1) } if num_fc_layers: hypers.update({ 'fc_hid_sizes': [256] * num_fc_layers, 'fc_init_factors': [1.0] * num_fc_layers }) if num_conv_layers: # HParams cannot handle empty lists hypers.update({ 'conv_widths': [16] * num_conv_layers, 'conv_depths': [32] * num_conv_layers, 'conv_strides': [1] * num_conv_layers, 'conv_rates': [1] * num_conv_layers, 'conv_init_factors': [1.0] * num_conv_layers }) return tf.HParams(**hypers)
def create_output_layer(experiment_proto, hps=None): """Create an output layer of the provided type. Args: experiment_proto: selection_pb2.Experiment describing the experiment. Must have pre-computed statistics necessary for normalizing counts. hps: optional tf.HParams with at least these fields: - output_layer: string indicating an OutputLayer class. - loss: string indicating a Loss class. - dependency_norm: string indicating the name of an normalization method to use for count dependencies in the OutputLayer. - loss_norm: string indicating the name of an normalization method to use in the loss. - standardize_log_transform: boolean indicating whether or not we want to log transform counts before standardizing them. Only relevant for if you use standardization for normalization. - binarize_threshold: integer giving the count threshold at which to binarize. Only relevant for losses that binarize. - target_names: list of strings giving target names to train against. - affinity_target_map: Only required for FULLY_OBSERVED models when the output layer will be used to calculate affinity, this dictionary maps each selection affinity molecule (e.g. protein) to a set of target outputs (i.e. sequencing count pools) to be used when calculating affinity. Returns: AbstractOutputLayer instance of the type indicated by `name`. Raises: ValueError: If any of `output_layer`, `loss`, `dependency_norm` or `loss_norm` are not recognized. """ if hps is None: hps = tf.HParams( output_layer=OUTPUT_FULLY_OBSERVED, loss=LOSS_SQUARED_ERROR, dependency_norm=NORM_STANDARDIZE, loss_norm=NORM_STANDARDIZE, standardize_log_transform=True, binarize_threshold=1, target_names=[TARGETS_ALL_OUTPUTS]) normalize = normalizer(hps.loss_norm, experiment_proto, hps.standardize_log_transform, hps.binarize_threshold) deps_normalize = normalizer(hps.dependency_norm, experiment_proto, hps.standardize_log_transform, hps.binarize_threshold) if hps.loss_name == LOSS_SQUARED_ERROR: loss = SquaredError(normalize) elif hps.loss_name == LOSS_CROSS_ENTROPY: loss = CrossEntropy(normalize) elif hps.loss_name == LOSS_POISSON_LOSS: if hps.loss_norm != NORM_SKIP: raise ValueError('invalid normalization for poisson loss') loss = PoissonLoss(normalize) elif hps.loss_name == LOSS_ZERO_TRUNCATED_POISSON_LOSS: if hps.loss_norm != NORM_SKIP: raise ValueError('invalid normalization for zero-truncated poisson loss') loss = ZeroTruncatedPoissonLoss(normalize) else: raise ValueError('unrecognized loss: %r' % hps.loss_name) if hps.additional_output: additional_output = hps.additional_output.split(',') else: additional_output = [] if hps.output_layer == OUTPUT_FULLY_OBSERVED: output_layer = FullyObserved(experiment_proto, loss, hps.affinity_target_map, hps.target_names, additional_output) elif hps.output_layer == OUTPUT_LATENT_AFFINITY: output_layer = LatentAffinity(experiment_proto, loss, hps.target_names, additional_output) elif hps.output_layer == OUTPUT_LATENT_WITH_DEPS: output_layer = LatentAffinityWithDeps(experiment_proto, loss, deps_normalize, hps.target_names, additional_output) elif hps.output_layer == OUTPUT_LATENT_WITH_PRED_DEPS: output_layer = LatentAffinityWithPredictedDeps(experiment_proto, loss, deps_normalize, hps.target_names, additional_output) elif hps.output_layer == OUTPUT_LATENT_WITH_CROSS_DEPS: output_layer = LatentAffinityWithCrossDeps(experiment_proto, loss, deps_normalize, hps.target_names, additional_output) else: raise ValueError('unrecognized output_layer: %r' % hps.output_layer) return output_layer
def get_pruning_hparams(): """Get a tf.HParams object with the default values for the hyperparameters. name: string name of the pruning specification. Used for adding summaries and ops under a common tensorflow name_scope begin_pruning_step: integer the global step at which to begin pruning end_pruning_step: integer the global step at which to terminate pruning. Defaults to -1 implying that pruning continues till the training stops weight_sparsity_map: list of strings comma separed list of weight variable name:target sparsity pairs. For layers/weights not in this list, sparsity as specified by the target_sparsity hyperparameter is used. Eg. [conv1:0.9,conv2/kernel:0.8] threshold_decay: float the decay factor to use for exponential decay of the thresholds pruning_frequency: integer How often should the masks be updated? (in # of global_steps) nbins: integer number of bins to use for histogram computation block_height: integer number of rows in a block (defaults to 1) block_width: integer number of cols in a block (defaults to 1) block_pooling_function: string Whether to perform average (AVG) or max (MAX) pooling in the block (default: AVG) initial_sparsity: float initial sparsity value target_sparsity: float target sparsity value sparsity_function_begin_step: integer the global step at this which the gradual sparsity function begins to take effect sparsity_function_end_step: integer the global step used as the end point for the gradual sparsity function sparsity_function_exponent: float exponent = 1 is linearly varying sparsity between initial and final. exponent > 1 varies more slowly towards the end than the beginning use_tpu: False Indicates whether to use TPU gradient_decay_rate: float when prune_option is gradient based pruning, decay factor for gradient decay prune_option: string option = 'weight' means using |weight| for pruning. option = 'first_order_gradient' means using |weight| * |first order gradient| for pruning. option = 'second_order_gradient' means using |weight| * |second order gradient| for pruning. second order gradient is approximated by |weight + old_old_weight - 2*old_weight|. option = 3 means using effective resistance for pruning. option > 3 reserved for future use We use the following sparsity function: num_steps = (sparsity_function_end_step - sparsity_function_begin_step)/pruning_frequency sparsity(step) = (initial_sparsity - target_sparsity)* [1-step/(num_steps -1)]**exponent + target_sparsity Args: None Returns: tf.HParams object initialized to default values """ return tf.HParams(name='model_pruning', begin_pruning_step=0, end_pruning_step=-1, weight_sparsity_map=[''], threshold_decay=0.0, pruning_frequency=10, nbins=256, block_height=1, block_width=1, block_pooling_function='AVG', initial_sparsity=0.0, target_sparsity=0.5, sparsity_function_begin_step=0, sparsity_function_end_step=100, sparsity_function_exponent=3.0, use_tpu=False, gradient_decay_rate=0.99, prune_option='weight')
def compute_experiment_statistics( experiment_proto, input_paths, proto_w_stats_path, preprocess_mode=data.PREPROCESS_SKIP_ALL_ZERO_COUNTS, max_size=None, logdir=None, save_stats=False): """Calculate the mean and standard deviation of counts from input files. These statistics are used for normalization. If any statistic is missing or save_stats=True, compute the statistics. Save the statitics to proto_w_stats_path if save_stats=True. Args: experiment_proto: selection_pb2.Experiment describing the experiment. input_paths: list of strings giving paths to sstables of input examples. proto_w_stats_path: string path to the validation proto file with stats preprocess_mode: optional preprocess mode defined in the `data` module. max_size: optional number of examples to examine to compute statistics. By default, examines the entire dataset. logdir: optional path to a directory in which to log events. save_stats: optional boolean indicating whether to update all the statistics and save to proto_w_stats_path. Returns: selection_pb2.Experiment with computed statistics. """ experiment_proto = copy.deepcopy(experiment_proto) has_all_statistics = True all_reads = {} for round_proto in experiment_proto.rounds.values(): for reads in [round_proto.positive_reads, round_proto.negative_reads]: if reads.name: all_reads[reads.name] = reads if not reads.HasField('statistics'): has_all_statistics = False all_ao = {} for ao_proto in experiment_proto.additional_output: if ao_proto.name: all_ao[ao_proto.name] = ao_proto if not ao_proto.HasField('statistics'): has_all_statistics = False if not has_all_statistics or save_stats: with tf.Graph().as_default(): logger.info('Setting up graph for statistics') # we only care about outputs, which don't rely on training hyper # parameters hps = tf.HParams(preprocess_mode=preprocess_mode, kmer_k_max=0, ratio_random_dna=0.0, total_reads_defining_positive=0, additional_output=','.join([ x.name for x in experiment_proto.additional_output ])) _, outputs = data.input_pipeline(input_paths, experiment_proto, final_mbsz=100000, hps=hps, num_epochs=1, num_threads=1) size_op = tf.shape(outputs)[list( outputs.axes.keys()).index('batch')] all_update_ops = [] all_value_ops = {} for name in all_reads: counts = lt.select(outputs, {'output': name}) log_counts = lt.log(counts + 1.0) ops = { 'mean': contrib_metrics.streaming_mean(counts), 'std_dev': streaming_std(counts), 'mean_log_plus_one': contrib_metrics.streaming_mean(log_counts), 'std_dev_log_plus_one': streaming_std(log_counts), } value_ops, update_ops = contrib_metrics.aggregate_metric_map( ops) all_update_ops.extend(list(update_ops.values())) all_value_ops[name] = value_ops for name in all_ao: ao = lt.select(outputs, {'output': name}) log_ao = lt.log(ao + 1.0) ops = { 'mean': contrib_metrics.streaming_mean(ao), 'std_dev': streaming_std(ao), 'mean_log_plus_one': contrib_metrics.streaming_mean(log_ao), 'std_dev_log_plus_one': streaming_std(log_ao), } value_ops, update_ops = contrib_metrics.aggregate_metric_map( ops) all_update_ops.extend(list(update_ops.values())) all_value_ops[name] = value_ops logger.info('Running statistics ops') sv = tf.train.Supervisor(logdir=logdir) with sv.managed_session() as sess: total = 0 for results in run_until_exhausted(sv, sess, [size_op] + all_update_ops): total += results[0] if max_size is not None and total >= max_size: break all_statistics = { k: sess.run(v) for k, v in all_value_ops.items() } for reads_name, reads in all_reads.items(): for name, value in all_statistics[reads_name].items(): setattr(reads.statistics, name, value.item()) for ao_name, ao in all_ao.items(): for name, value in all_statistics[ao_name].items(): setattr(ao.statistics, name, value.item()) logger.info('Computed statistics: %r', all_statistics) if save_stats: logger.info('Save the proto with statistics to %s', proto_w_stats_path) with open('/tmp/tmp.pbtxt', 'w') as f: f.write(text_format.MessageToString(experiment_proto)) gfile.Copy('/tmp/tmp.pbtxt', proto_w_stats_path, overwrite=True) else: logger.info('All the statistics exist. Nothing to compute') return experiment_proto