def testSetHParamListNonListMismatch(self): hparams = hparam.HParams(a=1, b=[2.0, 3.0]) with self.assertRaisesRegexp(ValueError, r'Must not pass a list'): hparams.set_hparam('a', [1.0]) with self.assertRaisesRegexp(ValueError, r'Must pass a list'): hparams.set_hparam('b', 1.0)
def testBoolParsingFail(self): hparams = hparam.HParams(use_gpu=True) with self.assertRaisesRegexp(ValueError, r'Could not parse.*use_gpu'): hparams.parse('use_gpu=yep')
def testContains(self): hparams = hparam.HParams(foo=1) self.assertTrue('foo' in hparams) self.assertFalse('bar' in hparams)
def get_pruning_hparams(): """Get a tf.HParams object with the default values for the hyperparameters. name: string name of the pruning specification. Used for adding summaries and ops under a common tensorflow name_scope begin_pruning_step: integer the global step at which to begin pruning end_pruning_step: integer the global step at which to terminate pruning. Defaults to -1 implying that pruning continues till the training stops weight_sparsity_map: list of strings comma separed list of weight variable name:target sparsity pairs. For layers/weights not in this list, sparsity as specified by the target_sparsity hyperparameter is used. Eg. [conv1:0.9,conv2/kernel:0.8] threshold_decay: float the decay factor to use for exponential decay of the thresholds pruning_frequency: integer How often should the masks be updated? (in # of global_steps) nbins: integer number of bins to use for histogram computation block_height: integer number of rows in a block (defaults to 1) block_width: integer number of cols in a block (defaults to 1) block_pooling_function: string Whether to perform average (AVG) or max (MAX) pooling in the block (default: AVG) initial_sparsity: float initial sparsity value target_sparsity: float target sparsity value sparsity_function_begin_step: integer the global step at this which the gradual sparsity function begins to take effect sparsity_function_end_step: integer the global step used as the end point for the gradual sparsity function sparsity_function_exponent: float exponent = 1 is linearly varying sparsity between initial and final. exponent > 1 varies more slowly towards the end than the beginning use_tpu: False Indicates whether to use TPU We use the following sparsity function: num_steps = (sparsity_function_end_step - sparsity_function_begin_step)/pruning_frequency sparsity(step) = (initial_sparsity - target_sparsity)* [1-step/(num_steps -1)]**exponent + target_sparsity Args: None Returns: tf.HParams object initialized to default values """ return hparam.HParams(name='model_pruning', begin_pruning_step=0, end_pruning_step=-1, weight_sparsity_map=[''], threshold_decay=0.0, pruning_frequency=10, nbins=256, block_height=1, block_width=1, block_pooling_function='AVG', initial_sparsity=0.0, target_sparsity=0.5, sparsity_function_begin_step=0, sparsity_function_end_step=100, sparsity_function_exponent=3, use_tpu=False)
parser.add_argument( '--eval-steps', help='Number of steps to run evalution for at each checkpoint', default=100, type=int) args = parser.parse_args() # Set python level verbosity tf.logging.set_verbosity(args.verbosity) # Set C++ Graph Execution level verbosity os.environ['TF_CPP_MIN_LOG_LEVEL'] = str( tf.logging.__dict__[args.verbosity] / 10) train_files = [] tflist = file_io.list_directory(args.train_files_dir) for x in tflist: if args.train_files_prefix in x: train_files.append(os.path.join(args.train_files_dir, x)) print("train files list: %s" % train_files) eval_files = [] eflist = file_io.list_directory(args.eval_files_dir) for x in eflist: if args.eval_files_prefix in x: eval_files.append(os.path.join(args.eval_files_dir, x)) print("eval files list: %s" % eval_files) # Run the training job hparams = hparam.HParams(**args.__dict__) run_experiment(train_files, eval_files, hparams)
def testSomeValues(self): hparams = hparam.HParams(aaa=1, b=2.0, c_c='relu6', d='/a/b=c/d') self.assertDictEqual( { 'aaa': 1, 'b': 2.0, 'c_c': 'relu6', 'd': '/a/b=c/d' }, hparams.values()) expected_str = ('[(\'aaa\', 1), (\'b\', 2.0), (\'c_c\', \'relu6\'), ' '(\'d\', \'/a/b=c/d\')]') self.assertEqual(expected_str, str(hparams.__str__())) self.assertEqual(expected_str, str(hparams)) self.assertEqual(1, hparams.aaa) self.assertEqual(2.0, hparams.b) self.assertEqual('relu6', hparams.c_c) self.assertEqual('/a/b=c/d', hparams.d) hparams.parse('aaa=12') self.assertDictEqual( { 'aaa': 12, 'b': 2.0, 'c_c': 'relu6', 'd': '/a/b=c/d' }, hparams.values()) self.assertEqual(12, hparams.aaa) self.assertEqual(2.0, hparams.b) self.assertEqual('relu6', hparams.c_c) self.assertEqual('/a/b=c/d', hparams.d) hparams.parse('c_c=relu4, b=-2.0e10') self.assertDictEqual( { 'aaa': 12, 'b': -2.0e10, 'c_c': 'relu4', 'd': '/a/b=c/d' }, hparams.values()) self.assertEqual(12, hparams.aaa) self.assertEqual(-2.0e10, hparams.b) self.assertEqual('relu4', hparams.c_c) self.assertEqual('/a/b=c/d', hparams.d) hparams.parse('c_c=,b=0,') self.assertDictEqual({ 'aaa': 12, 'b': 0, 'c_c': '', 'd': '/a/b=c/d' }, hparams.values()) self.assertEqual(12, hparams.aaa) self.assertEqual(0.0, hparams.b) self.assertEqual('', hparams.c_c) self.assertEqual('/a/b=c/d', hparams.d) hparams.parse('c_c=2.3",b=+2,') self.assertEqual(2.0, hparams.b) self.assertEqual('2.3"', hparams.c_c) hparams.parse('d=/a/b/c/d,aaa=11,') self.assertEqual(11, hparams.aaa) self.assertEqual(2.0, hparams.b) self.assertEqual('2.3"', hparams.c_c) self.assertEqual('/a/b/c/d', hparams.d) hparams.parse('b=1.5,d=/a=b/c/d,aaa=10,') self.assertEqual(10, hparams.aaa) self.assertEqual(1.5, hparams.b) self.assertEqual('2.3"', hparams.c_c) self.assertEqual('/a=b/c/d', hparams.d) with self.assertRaisesRegexp(ValueError, 'Unknown hyperparameter'): hparams.parse('x=123') with self.assertRaisesRegexp(ValueError, 'Could not parse'): hparams.parse('aaa=poipoi') with self.assertRaisesRegexp(ValueError, 'Could not parse'): hparams.parse('aaa=1.0') with self.assertRaisesRegexp(ValueError, 'Could not parse'): hparams.parse('b=12x') with self.assertRaisesRegexp(ValueError, 'Could not parse'): hparams.parse('b=relu') with self.assertRaisesRegexp(ValueError, 'Must not pass a list'): hparams.parse('aaa=[123]') self.assertEqual(10, hparams.aaa) self.assertEqual(1.5, hparams.b) self.assertEqual('2.3"', hparams.c_c) self.assertEqual('/a=b/c/d', hparams.d) # Exports to proto. hparam_def = hparams.to_proto() # Imports from proto. hparams2 = hparam.HParams(hparam_def=hparam_def) # Verifies that all hparams are restored. self.assertEqual(10, hparams2.aaa) self.assertEqual(1.5, hparams2.b) self.assertEqual('2.3"', hparams2.c_c) self.assertEqual('/a=b/c/d', hparams2.d)
x={"x": eval_data}, y=eval_labels, num_epochs=1, shuffle=False) estimator = tf.estimator.Estimator(model_fn=model.solution) steps_per_eval = int(model.get_training_steps() / params.eval_steps) for _ in range(params.eval_steps): estimator.train(train_input_fn, steps=steps_per_eval) estimator.evaluate(eval_input_fn) if __name__ == "__main__": PARSER = argparse.ArgumentParser() PARSER.add_argument( '--eval-steps', help='Number of steps to run evaluation for at each checkpoint', default=1, type=int ) ARGS = PARSER.parse_args() tf.logging.set_verbosity('INFO') os.environ['TF_CPP_MIN_LOG_LEVEL'] = "0" #os.environ['TF_CPP_MIN_LOG_LEVEL'] = str(tf.logging.__dict__['INFO'] / 10) HPARAMS = hparam.HParams(**ARGS.__dict__) train_model(HPARAMS)
parser.add_argument('-b', '--batch-size', help='Training batch size', default=200, type=int) parser.add_argument('-t', '--step-rate', help='Step rate', default=1e-3, type=float) parser.add_argument('-x', '--max-steps', help='Max training steps', default=20000, type=int) parser.add_argument('--configure', default=None, help="Model structure configure json file.") parser.add_argument('-k' '--kmer', help='K-mer length', default=1, type=int) parser.add_argument('-r', '--retrain', help='Flag if retrain the model', default=False, type=bool) args = parser.parse_args() run(hparam.HParams(**args.__dict__))
def generate_experiment_fn(**experiment_args): """Create an experiment function. See command line help text for description of args. Args: experiment_args: keyword arguments to be passed through to experiment See `tf.contrib.learn.Experiment` for full args. Returns: A function: (tf.contrib.learn.RunConfig, tf.contrib.training.HParams) -> Experiment This function is used by learn_runner to create an Experiment which executes model code provided in the form of an Estimator and input functions. """ def _experiment_fn(run_config, hparams): # num_epochs can control duration if train_steps isn't # passed to Experiment train_input = lambda: model.generate_input_fn( hparams.train_files, num_epochs=hparams.num_epochs, batch_size=hparams.train_batch_size, ) # Don't shuffle evaluation data eval_input = lambda: model.generate_input_fn( hparams.eval_files, batch_size=hparams.eval_batch_size, shuffle=False ) return tf.contrib.learn.Experiment( model.build_estimator( embedding_size=hparams.embedding_size, # Construct layers sizes with exponetial decay hidden_units=[ max(2, int(hparams.first_layer_size * hparams.scale_factor**i)) for i in range(hparams.num_layers) ], config=run_config ), train_input_fn=train_input, eval_input_fn=eval_input, **experiment_args ) return _experiment_fn # Set python level verbosity # tf.logging.set_verbosity(args.verbosity) # Set C++ Graph Execution level verbosity # os.environ['TF_CPP_MIN_LOG_LEVEL'] = str( # tf.logging.__dict__[args.verbosity] / 10) # If job_dir_reuse is False then remove the job_dir if it exists # if not args.reuse_job_dir: # if tf.gfile.Exists(args.job_dir): # tf.gfile.DeleteRecursively(args.job_dir) # tf.logging.info("Deleted job_dir {} to avoid re-use".format(args.job_dir)) # else: # tf.logging.info("No job_dir available to delete") # else: # tf.logging.info("Reusing job_dir {} if it exists".format(args.job_dir)) # Run the training job # learn_runner pulls configuration information from environment # variables using tf.learn.RunConfig and uses this configuration # to conditionally execute Experiment, or param server code learn_runner.run( generate_experiment_fn( train_steps=FLAGS.max_steps, eval_steps=FLAGS.eval_steps, ), run_config=run_config.RunConfig(model_dir=FLAGS.job_dir), hparams=hparam.HParams(FLAGS) )
class SpectrumAugmenter(): """Performs data augmentation as according to the SpecAug paper. https://arxiv.org/pdf/1904.08779.pdf """ params = hparam.HParams( # Maximum number of frequency bins of frequency masking. freq_mask_max_bins=15, # Number of times we apply masking on the frequency axis. freq_mask_count=1, # Maximum number of frames of time masking. Overridden when use_dynamic_time_mask_max_frames = True. time_mask_max_frames=50, # Number of times we apply masking on the time axis. Acts as upper-bound when time_masks_per_frame > 0. time_mask_count=1, # If true, time_mask_max_frames is determined by time_mask_max_ratio * utterance_length. use_dynamic_time_mask_max_frames=False, # Maximum portion allowed for time masking. time_mask_max_ratio=1.0, # Ratio of number of time masks to be applied against the number of frames. If > 0, # multiplicity of the time mask is determined by min(time_masks_per_frame * utterance_length, time_mask_count). time_masks_per_frame=0.0, # To be set to either `dynamic` or `static`. 'If `dynamic`, # time warp bound is determined by 'time_warp_max_ratio * utterance_length. # ' If `static`, time warp bound is determined by min(time_warp_max_frames, time_warp_max_ratio * utterance_length). time_warp_bound='static', # Maximum number of frames for shifting in time warping. time_warp_max_frames=0, # Maximum portion of frames for shifting in time warping. time_warp_max_ratio=0.0, use_noise=False, # Whether to noisify the time masked region. gaussian_noise=False, # Use Gaussian distribution for noise. # Whether to unstack features before applying SpecAugment. unstack=False, stack_height=3, # Number of frames stacked on top of each other. # Whether to use stateless random TensorFlow ops, with seeds determined by the input features. \ # This feature is necessary for applications including federated learning. use_input_dependent_random_seed=False, dtype=tf.float32, # Datatype to use. fprop_dtype=None, # Activations datatype to use. random_seed=None, # Random seed for deterministic unittests. ) def __init__(self, config=None): if config is not None: self.params.override_from_dict(config) def EinsumBBmBm(self, a, b, name=None): return tf.einsum('b,bm->bm', a, b, name=name) def EinsumBmtBmBt(self, a, b, name=None): return tf.einsum('bmt,bm->bt', a, b, name=name) def EinsumBxycByBxyc(self, a, b, name=None): return tf.einsum('bxyc,by->bxyc', a, b, name=name) def EinsumBxycBxBxyc(self, a, b, name=None): return tf.einsum('bxyc,bx->bxyc', a, b, name=name) def EinsumBxyBxBxy(self, a, b, name=None): return tf.einsum('bxy,bx->bxy', a, b, name=name) def EinsumBxycBzxBzyc(self, a, b, name=None): return tf.einsum('bxyc,bzx->bzyc', a, b, name=name) def _GetMask(self, batch_size, choose_range, mask_size, global_seed, max_length=None, masks_per_frame=0.0, multiplicity=1, dtype=tf.float32, max_ratio=1.0): """Returns fixed size multi-masks starting from random positions. A multi-mask is a mask obtained by applying multiple masks. This function when max_length is given: 1) Sample random mask lengths less than max_length with shape (batch_size, multiplicity). 2) Truncate lengths to a max of (choose_range * max_ratio), so that each mask is fully contained within the corresponding sequence. 3) Random sample start points of shape (batch_size, multiplicity) with in (choose_range - lengths). 4) For each batch, multiple masks (whose number is given by the multiplicity) are constructed. 5) Return a mask of shape (batch_size, mask_size) where masks are obtained by composing the masks constructed in step 4). If masks_per_frame > 0, the number is given by min(masks_per_frame * choose_range, multiplicity). If not, all the masks are composed. The masked regions are set to zero. This function when max_length is not given: 1) Sample random mask lengths less than (choose_range * max_ratio) with shape (batch_size, multiplicity). 2) Proceed to steps 3), 4) and 5) of the above. Args: batch_size: Batch size. Integer number. choose_range: Range within which the masked entries must lie. Tensor of shape (batch_size,). mask_size: Size of the mask. Integer number. global_seed: an integer seed tensor for stateless random ops. max_length: Maximum number of allowed consecutive masked entries. Integer number or None. masks_per_frame: Number of masks per frame. Float number. If > 0, the multiplicity of the mask is set to be masks_per_frame * choose_range. multiplicity: Maximum number of total masks. Integer number. dtype: Data type. max_ratio: Maximum portion of the entire range allowed to be masked. Float number. Returns: mask: a fixed size multi-mask starting from a random position with shape (batch_size, mask_size). """ p = self.params # Non-empty random seed values are only used for testing or when using # stateless random ops. seed_1 and seed_2 are set separately to avoid # correlation of mask size and mask position. if p.use_input_dependent_random_seed: seed_1 = global_seed + 1 seed_2 = global_seed + 2 elif p.random_seed: seed_1 = p.random_seed + 1 seed_2 = 2 * p.random_seed else: seed_1 = p.random_seed seed_2 = p.random_seed # Sample lengths for multiple masks. if max_length and max_length > 0: max_length = tf.broadcast_to(tf.cast(max_length, dtype), (batch_size, )) else: max_length = tf.cast(choose_range, dtype=dtype) * max_ratio random_uniform = _random_uniform_op(p.use_input_dependent_random_seed) masked_portion = random_uniform(shape=(batch_size, multiplicity), minval=0.0, maxval=1.0, dtype=dtype, seed=seed_1) masked_frame_size = self.EinsumBBmBm(max_length, masked_portion) masked_frame_size = tf.cast(masked_frame_size, dtype=tf.int32) # Make sure the sampled length was smaller than max_ratio * length_bound. # Note that sampling in this way was biased # (shorter sequence may over-masked.) choose_range = tf.expand_dims(choose_range, -1) choose_range = tf.tile(choose_range, [1, multiplicity]) length_bound = tf.cast(choose_range, dtype=dtype) length_bound = tf.cast(max_ratio * length_bound, dtype=tf.int32) length = tf.minimum(masked_frame_size, tf.maximum(length_bound, 1)) # Choose starting point. random_start = random_uniform(shape=(batch_size, multiplicity), maxval=1.0, seed=seed_2) start_with_in_valid_range = random_start * tf.cast( (choose_range - length + 1), dtype=dtype) start = tf.cast(start_with_in_valid_range, tf.int32) end = start + length - 1 # Shift starting and end point by small value. delta = tf.constant(0.1) start = tf.expand_dims(tf.cast(start, dtype) - delta, -1) start = tf.tile(start, [1, 1, mask_size]) end = tf.expand_dims(tf.cast(end, dtype) + delta, -1) end = tf.tile(end, [1, 1, mask_size]) # Construct pre-mask of shape (batch_size, multiplicity, mask_size). diagonal = tf.expand_dims( tf.expand_dims(tf.cast(tf.range(mask_size), dtype=dtype), 0), 0) diagonal = tf.tile(diagonal, [batch_size, multiplicity, 1]) pre_mask = tf.cast(tf.math.logical_and(diagonal < end, diagonal > start), dtype=dtype) # Sum masks with appropriate multiplicity. if masks_per_frame > 0: multiplicity_weights = tf.tile( tf.expand_dims(tf.range(multiplicity, dtype=dtype), 0), [batch_size, 1]) multiplicity_tensor = masks_per_frame * \ tf.cast(choose_range, dtype=dtype) multiplicity_weights = tf.cast( multiplicity_weights < multiplicity_tensor, dtype=dtype) pre_mask = self.EinsumBmtBmBt(pre_mask, multiplicity_weights) else: pre_mask = tf.reduce_sum(pre_mask, 1) mask = tf.cast(1.0 - tf.cast(pre_mask > 0, dtype=dtype), dtype=dtype) if p.fprop_dtype is not None and p.fprop_dtype != p.dtype: mask = tf.cast(mask, p.fprop_dtype) return mask def _GetWarpMatrix(self, batch_size, choose_range, matrix_size, global_seed, max_warp_frames=None, dtype=tf.float32, max_ratio=1.0): """Returns warp matrices starting from random positions. In this function when max_warp_frames != None: 1) Sample random warp displacements from the interval [-max_warp_frames, max_warp_frames) to yield shift tensor with shape (batch_size,). 2) Truncate lengths to a maximum magnitude of (choose_range * max_ratio), so that each shift is fully contained within the corresponding sequence. 3) Random sample origin points of shape (batch_size, multiplicity) with in [shift, choose_range - shift). 4) Return a batch of 1-D linear maps that fix the boundary points and shift the origin point by the shift. When max_warp_frames == None: 1) Sample random warp displacements with magnitudes less than (choose_range * max_ratio) to yield shift tensor with shape (batch_size,). 2) Proceed through steps 3), 4). Args: batch_size: Batch size. Integer number. choose_range: Range within which the warp reference points must lie. Tensor of shape (batch_size,). matrix_size: Dimension of vector space warp matrix is applied to. Integer number. global_seed: an integer seed tensor for stateless random ops. max_warp_frames: Upper-bound on the warp distance. Integer or None. dtype: Data type. max_ratio: Maximum ratio between the shift distance and choose_range. Float number. Returns: warp_matrix: An array of fixed size warp matrices with shape (batch_size, matrix_size, matrix_size). """ p = self.params # Non-empty random seed values are only used for testing or when using # stateless random ops. seed_3, seed_4, and seed_5 are set separately to # avoid correlation of warp magnitude and origin position. if p.use_input_dependent_random_seed: seed_3 = global_seed + 3 seed_4 = global_seed + 4 seed_5 = global_seed + 5 elif p.random_seed: seed_3 = p.random_seed - 1 seed_4 = p.random_seed - 1 seed_5 = 2 * p.random_seed + 1 else: seed_3 = p.random_seed seed_4 = p.random_seed seed_5 = p.random_seed choose_range_dtype = tf.cast(choose_range, dtype=dtype) length_upper_bound = tf.cast(max_ratio * choose_range_dtype, dtype=tf.int32) # Set shift length. random_uniform = _random_uniform_op(p.use_input_dependent_random_seed) if max_warp_frames and max_warp_frames > 0: shift = random_uniform(shape=(batch_size, ), minval=-1 * max_warp_frames, maxval=max_warp_frames + 1, dtype=tf.int32, seed=seed_3) else: random_ratio = random_uniform(shape=(batch_size, ), minval=-1.0, maxval=1.0, dtype=dtype, seed=seed_4) shift = tf.cast( random_ratio * tf.cast(length_upper_bound, dtype=dtype), tf.int32) # Make sure the sampled length was smaller than max_ratio * length_bound. # Note that sampling in this way is biased. # (Shorter sequence may over-masked.) final_shift = tf.maximum(-length_upper_bound, tf.minimum(shift, length_upper_bound)) # Choose origin anchor point. mid_range = tf.cast(choose_range, dtype=tf.int32) mid_range = tf.maximum(choose_range - 2, 0) random_origin = random_uniform(shape=(batch_size, ), maxval=1.0, seed=seed_5) origin_with_in_valid_range = random_origin * \ tf.cast(mid_range, dtype=dtype) origin = tf.cast(origin_with_in_valid_range, tf.int32) + 1 # Set destination point of the origin anchor point under the warp map. destination = origin + final_shift # Cast origin and destination. origin = tf.cast(origin, dtype=dtype) destination = tf.cast(destination, dtype=dtype) return self._ConstructWarpMatrix(batch_size=batch_size, matrix_size=matrix_size, origin=origin, destination=destination, choose_range=choose_range_dtype, dtype=dtype) def _ConstructWarpMatrix(self, batch_size, matrix_size, origin, destination, choose_range, dtype): """Returns warp matrices according to origin, destination and choose_range. This function constructs a batch of warp matrices which maps the batch of origin points to the batch of destination points with fixed boundary coordinates at 0 and choose_range. The warping function, defined by the origin anchor point `origin`, the destination of the origin anchor point `destination` and the length of the domain in the warping axis `choose_range` is a piecewise linear map that fixes the points 0 and `choose_range` and maps `origin` to `destination`. For the warping matrix to be non-singular, destination must lie in the range 1<= destination <= choose_range - 1, so a destination out of this range is adjusted to be in this range before the warping matrix is constructed. The warping map can be explicitly written by first defining the slopes: 1) slope_0 = origin / destination. 2) slope_1 = (choose_range - origin) / (choose_range - destination). 3) slope_2 = 1.0. Then the origin point orig_i of the mapped coordinate i is given by: 1) i < destination: orig_i = slope_0 * i. 2) destination <= i < choose_range: orig_i = slope_1 * i - (slope_1 - slope_0) * destination. 3) i >= choose_range: orig_i = i. Denoting n_i = ceil(orig_i), the warp matrix element warp[i][j] is given by: 1) j = n_i: 1 - n_i + orig_i. 2) j = n_i - 1: n_i - orig_i. 3) Otherwise: 0. Applying the warp matrix to an array of pixels, i.e., warped_pixel[i] = sum_j warp[i][j] * pixel[j], one would get warped_pixel[i] = (n_i-orig_i) pixel[n_i-1] + (1-n_i+orig_i) pixel[n_i]. Args: batch_size: Batch size. Integer number. matrix_size: Dimension of the vector space the warp matrix is applied to. Integer number. origin: Origin anchor point for warping. Tensor of shape (batch_size,) and data type dtype. destination: Destination of the origin anchor point upon warping. Tensor of shape (batch_size,) and data type dtype. choose_range: Range within which the warp reference points must lie. Tensor of shape (batch_size,) data type dtype. dtype: Data type of origin, destination, choose_range and the output warp matrix. Returns: warp_matrix: An array of fixed size warp matrices with shape (batch_size, matrix_size, matrix_size). """ p = self.params # Entries of destination must be in the range # 1 <= destination <= choose_range - 1 # for warp matrix to have non-singular values. destination = tf.minimum(tf.maximum(destination, 1.0), choose_range - 1.0) # Construct piece-wise linear function fixing boundary points # specified by zero, choose_range and matrix size and maps # the origin anchor point to the destination. destination_bc = tf.broadcast_to(destination, (matrix_size, batch_size)) destination_bc = tf.transpose(destination_bc) choose_range_bc = tf.broadcast_to(choose_range, (matrix_size, batch_size)) choose_range_bc = tf.transpose(choose_range_bc) # Slopes of piece-wise linear function. slope_0 = origin / destination slope_1 = (choose_range - origin) / (choose_range - destination) slope_2 = 1.0 # x is a batch of origin matrices. # The origin matrix is the matrix such that # origin[i][j] = Origin coordinate of coordinate i for the warp map. # Denoting the destination of the origin anchor point in the # warp map as "dest," the origin coordinate of point i is given by: # 1) i < dest: slope_0 * i. # 2) dest <= i < choose_range: slope_1 * i - (slope_1 - slope_0) * dest. # 3) i >= choose_range: i. x = tf.broadcast_to(tf.cast(tf.range(matrix_size), dtype=dtype), (batch_size, matrix_size)) x = (self.EinsumBBmBm(slope_0, x) + self.EinsumBBmBm( slope_1 - slope_0, tf.nn.relu(x - destination_bc)) + self.EinsumBBmBm(slope_2 - slope_1, tf.nn.relu(x - choose_range_bc))) x = tf.broadcast_to(x, (matrix_size, batch_size, matrix_size)) x = tf.transpose(x, perm=[1, 2, 0]) # y is a batch of coordinate matrices. # A coordinate matrix is a matrix such that # coordinate[i][j] = j. y = tf.broadcast_to(tf.cast(tf.range(matrix_size), dtype=dtype), (batch_size, matrix_size, matrix_size)) # Warp matrix is obtained by applying hat function element-wise to (x-y). # Denoting the origin point of i under the warp map as orig_i, # and n_i = ceil(orig_i), the warp matrix element warp[i][j] is given by: # 1) j = n_i: 1 - n_i + orig_i. # 2) j = n_i - 1: n_i - orig_i. # 3) Otherwise: 0. # Applying the warp matrix to pixels, i.e., # warped_pixel[i] = sum_j warp[i][j] * original_pixel[j], one would get # warped_pixel[i] = (n_i - orig_i) * original_pixel[n_i-1] # + (1 - n_i + orig_i) * original_pixel[n_i]. warp_matrix = x - y warp_matrix = _hat(warp_matrix) if p.fprop_dtype is not None and p.fprop_dtype != dtype: warp_matrix = tf.cast(warp_matrix, p.fprop_dtype) return warp_matrix def _FrequencyMask(self, inputs, global_seed, dtype=tf.float32): """Applies frequency masking with given degree to inputs. Args: inputs: Batch of input features of shape (batch_size, time_length, num_freq, channels). global_seed: an integer seed tensor for stateless random ops. dtype: Data type. Returns: Inputs with random frequency masking applied. """ p = self.params # Mask parameters. freq_mask_max_bins = p.freq_mask_max_bins multiplicity = p.freq_mask_count # If masking length or count is zero, do nothing. if freq_mask_max_bins == 0 or multiplicity == 0: return inputs # Arguments to pass to mask generator. batch_size, _, num_freq, _ = GetShape(inputs) choose_range = tf.cast(tf.broadcast_to(num_freq, (batch_size, )), dtype=tf.int32) # Create masks in frequency direction and apply. block_arrays = self._GetMask(tf.shape(inputs)[0], choose_range=choose_range, mask_size=num_freq, global_seed=global_seed, max_length=freq_mask_max_bins, masks_per_frame=0.0, multiplicity=multiplicity, dtype=dtype, max_ratio=1.0) return self.EinsumBxycByBxyc(inputs, block_arrays) def _TimeMask(self, inputs, seq_lengths, global_seed, noisify=False, gaussian_noise=False, dtype=tf.float32): """Applies time masking with given degree to inputs. Args: inputs: Batch of input features of shape (batch_size, time_length, num_freq, channels). seq_lengths: The actual sequence lengths which mask been sampled of shape (batch_size,). global_seed: an integer seed tensor for stateless random ops. noisify: Whether to noisify the masked out regions. gaussian_noise: Whether to use gaussian noise when noisifying. dtype: Data type. Returns: Inputs with random time masking applied. """ p = self.params # Get time masking parameters. time_mask_max_frames = p.time_mask_max_frames time_masks_per_frame = p.time_masks_per_frame use_dynamic_time_mask_max_frames = \ p.use_dynamic_time_mask_max_frames multiplicity = p.time_mask_count max_ratio = p.time_mask_max_ratio # If maximum mask length is zero, do nothing. if ((time_mask_max_frames == 0 and not use_dynamic_time_mask_max_frames) or max_ratio <= 0.0): return inputs if multiplicity == 0: return inputs seq_lengths = tf.cast(seq_lengths, tf.int32) batch_size, time_length, _, _ = GetShape(inputs) # When using dynamic time mask size, discard upper-bound on # maximum allowed frames for time mask. if use_dynamic_time_mask_max_frames: time_mask_max_frames = None # Create masks in time direction and apply. block_arrays = self._GetMask(batch_size, choose_range=seq_lengths, mask_size=time_length, global_seed=global_seed, max_length=time_mask_max_frames, masks_per_frame=time_masks_per_frame, multiplicity=multiplicity, dtype=dtype, max_ratio=max_ratio) # Non-empty random seed values are only used for testing or when using # stateless random ops. seed_6 and seed_7 are set separately to avoid # correlation of warp magnitude and origin position. if p.use_input_dependent_random_seed: seed_6 = global_seed + 6 seed_7 = global_seed + 7 else: seed_6 = p.random_seed seed_7 = p.random_seed outputs = self.EinsumBxycBxBxyc(inputs, block_arrays, name='einsum_formasking') if noisify: # Sample noise with standard deviation with factor * 0.1 + 0.0001 # TODO(ngyuzh): Make sure this won't affect EOS. if gaussian_noise: stddev = 1.0 else: random_uniform = _random_uniform_op( p.use_input_dependent_random_seed) factor = random_uniform(shape=(), minval=1.0, maxval=2.0, dtype=dtype, seed=seed_6) stddev = factor * 0.1 + 0.0001 random_normal = _random_normal_op( p.use_input_dependent_random_seed) noise = random_normal(shape=[ tf.shape(inputs)[0], tf.shape(inputs)[1], tf.shape(inputs)[2] ], stddev=stddev, seed=seed_7) if p.fprop_dtype is not None and p.fprop_dtype != p.dtype: noise = tf.cast(noise, p.fprop_dtype) outputs_mask = self.EinsumBxyBxBxy(noise, 1.0 - block_arrays, name='einsum_fornoisymasking') outputs = outputs + tf.expand_dims(outputs_mask, -1) return outputs def _TimeWarp(self, inputs, seq_lengths, global_seed, dtype=tf.float32): """Applies time warping with given degree to inputs. Args: inputs: Batch of input features of shape (batch_size, time_length, num_freq, channels). seq_lengths: The actual sequence lengths which mask been sampled of shape (batch_size,). global_seed: an integer seed tensor for stateless random ops. dtype: Data type. Returns: Inputs with random time warping applied. """ p = self.params batch_size, time_length, _, _ = GetShape(inputs) # Get parameters for warping. time_warp_max_frames = p.time_warp_max_frames max_ratio = p.time_warp_max_ratio time_warp_bound = p.time_warp_bound assert time_warp_bound in ('static', 'dynamic') # If maximum warp length is zero, do nothing. if ((time_warp_max_frames == 0 and time_warp_bound == 'static') or max_ratio <= 0.0): return inputs seq_lengths = tf.cast(seq_lengths, tf.int32) # Discard upper-bound on time-warp frames when # dynamic time warping is used. if time_warp_bound == 'dynamic': time_warp_max_frames = None # Create warping matrix in time direction and apply warp_matrix = self._GetWarpMatrix(batch_size, choose_range=seq_lengths, matrix_size=time_length, global_seed=global_seed, max_warp_frames=time_warp_max_frames, dtype=dtype, max_ratio=max_ratio) return self.EinsumBxycBzxBzyc(inputs, warp_matrix, name='einsum_forwarping') def UnstackFeatures(self, src_inputs, src_paddings): """Unstacks src_input and src_paddings based off stack height.""" sh = self.params.stack_height bs, old_series_length, _, channels = GetShape(src_inputs) unstacked_series_length = old_series_length * sh src_inputs = tf.reshape(src_inputs, [bs, unstacked_series_length, -1, channels]) content = 1 - src_paddings lengths = tf.cast(sh * tf.reduce_sum(content, axis=1), tf.int32) mask = tf.sequence_mask(lengths, maxlen=unstacked_series_length) src_paddings = 1 - tf.cast(mask, tf.int32) return src_inputs, src_paddings def _AugmentationNetwork(self, series_length, inputs, paddings, global_seed): """Returns augmented features. Args: series_length: Total length of time series. inputs: Batch of input features of shape (batch_size, time_length, num_freq, channels). paddings: Batch of padding vectors of shape (batch_size, time_length). global_seed: an integer seed tensor for stateless random ops. Returns: Batch of output features of shape (batch_size, time_length, num_freq, channels) obtained by applying random augmentations to inputs. """ p = self.params dtype = p.dtype # Unstack the features. if p.unstack: inputs, paddings = self.UnstackFeatures(inputs, paddings) lengths = tf.reduce_sum(1 - paddings, 1) inputs = self._TimeWarp(inputs, lengths, global_seed=global_seed, dtype=dtype) inputs = self._TimeMask(inputs, lengths, global_seed=global_seed, noisify=p.use_noise, gaussian_noise=p.gaussian_noise, dtype=dtype) inputs = self._FrequencyMask(inputs, global_seed=global_seed, dtype=dtype) # Restack the features after applying specaugment. if p.unstack: inputs = tf.reshape( inputs, [tf.shape(inputs)[0], series_length, -1, tf.shape(inputs)[3]]) return inputs def __call__(self, inputs, seq_len): """Applies data augmentation by randomly mask spectrum in inputs. Args: inputs: A tensor of shape [batch, time, freq, num_channels]. paddings: A 0/1 tensor of shape [batch, time]. Returns: A pair of 2 tensors: - augmented_inputs: A tensor of shape [batch, time, freq, num_channels]. - paddings: A 0/1 tensor of shape [batch, time]. """ p = self.params paddings = 1 - tf.sequence_mask( seq_len, tf.shape(inputs)[1], dtype=tf.float32) inputs = tf.expand_dims(inputs, -1) # A tensor seed in case stateless random ops are needed. global_seed = None if p.use_input_dependent_random_seed: global_seed = _global_seed_from_inputs(inputs) batch_size, series_length, _, _ = GetShape(inputs) augmented_inputs = self._AugmentationNetwork(series_length, inputs, paddings, global_seed=global_seed) return tf.reshape(augmented_inputs, [batch_size, series_length, -1])
class QTest(test.TestCase): hparams = hparam.HParams( learning_rate=1.25e-3, hidden_layers=[16, 16], initial_exploration=.5, discount=.99, exploration_decay_steps=256 // 16 * 25, exploration_decay_rate=.99, max_sequence_length=1, num_episodes=256, batch_size=16, num_iterations=100, assign_target_steps=10 * 16, huber_loss_delta=1., num_quantiles=51) @test_util.skip_if(True) def test_q_ops_dqn(self): ops.reset_default_graph() np.random.seed(42) random_seed.set_random_seed(42) env = gym.make('CartPole-v0') env.seed(42) # Setup the policy and model global_step = training_util.get_or_create_global_step() deterministic_ph = array_ops.placeholder( dtypes.bool, [], name='deterministic') exploration_op = learning_rate_decay.exponential_decay( QTest.hparams.initial_exploration, global_step, QTest.hparams.exploration_decay_steps, QTest.hparams.exploration_decay_rate) state_distribution, state_ph = gym_ops.distribution_from_gym_space( env.observation_space, name='state_space') with variable_scope.variable_scope('logits'): action_value_op = mlp(state_ph, QTest.hparams.hidden_layers) action_distribution, action_value_op = gym_ops.distribution_from_gym_space( env.action_space, logits=[action_value_op], name='action_space') action_op = array_ops.squeeze(sampling_ops.epsilon_greedy( action_distribution, exploration_op, deterministic_ph)) policy_variables = variables.trainable_variables(scope='logits') next_state_ph = shortcuts.placeholder_like(state_ph, name='next_state_space') with variable_scope.variable_scope('logits', reuse=True): next_action_value_op = mlp(next_state_ph, QTest.hparams.hidden_layers) next_action_distribution, next_action_value_op = gym_ops.distribution_from_gym_space( env.action_space, logits=[next_action_value_op], name='action_space') next_action_op = array_ops.squeeze(sampling_ops.epsilon_greedy( next_action_distribution, exploration_op, deterministic_ph)) # Setup the dataset stream = streams.Uniform.from_distributions( state_distribution, action_distribution) replay_dataset = dataset.ReplayDataset( stream, max_sequence_length=QTest.hparams.max_sequence_length) replay_dataset = replay_dataset.batch(QTest.hparams.batch_size) replay_op = replay_dataset.make_one_shot_iterator().get_next() action_ph = array_ops.placeholder( stream.action_dtype, [None, None] + stream.action_shape, name='action') reward_ph = array_ops.placeholder( stream.reward_dtype, [None, None] + stream.reward_shape, name='reward') terminal_ph = array_ops.placeholder( dtypes.bool, [None, None], name='terminal') sequence_length_ph = array_ops.placeholder( dtypes.int32, [None, 1], name='sequence_length') sequence_length = array_ops.squeeze(sequence_length_ph, -1) q_value_op, expected_q_value_op = q_ops.expected_q_value( reward_ph, action_ph, action_value_op, next_action_value_op, weights=(1 - math_ops.cast(terminal_ph, reward_ph.dtype)), discount=QTest.hparams.discount) # mean_squared_error loss_op = math_ops.square(q_value_op - expected_q_value_op) loss_op = math_ops.reduce_mean( math_ops.reduce_sum(loss_op, axis=-1) / math_ops.cast( sequence_length, loss_op.dtype)) optimizer = adam.AdamOptimizer( learning_rate=QTest.hparams.learning_rate) train_op = optimizer.minimize(loss_op, var_list=policy_variables) with self.test_session() as sess: sess.run(variables.global_variables_initializer()) for iteration in range(QTest.hparams.num_iterations): rewards = gym_test_utils.rollout_on_gym_env( sess, env, state_ph, deterministic_ph, action_value_op, action_op, num_episodes=QTest.hparams.num_episodes, stream=stream) while True: try: replay = sess.run(replay_op) except (errors_impl.InvalidArgumentError, errors_impl.OutOfRangeError): break _, loss = sess.run( (train_op, loss_op), feed_dict={ state_ph: replay.state, next_state_ph: replay.next_state, action_ph: replay.action, reward_ph: replay.reward, terminal_ph: replay.terminal, sequence_length_ph: replay.sequence_length, }) rewards = gym_test_utils.rollout_on_gym_env( sess, env, state_ph, deterministic_ph, action_value_op, action_op, num_episodes=QTest.hparams.num_episodes, deterministic=True, save_replay=False) print('average_rewards = {}'.format(rewards / QTest.hparams.num_episodes)) # @test_util.skip_if(True) def test_q_ops_double_dqn(self): env = gym.make('CartPole-v0') ops.reset_default_graph() np.random.seed(42) random_seed.set_random_seed(42) env.seed(42) # Setup the policy and model global_step = training_util.get_or_create_global_step() deterministic_ph = array_ops.placeholder( dtypes.bool, [], name='deterministic') exploration_op = learning_rate_decay.exponential_decay( QTest.hparams.initial_exploration, global_step, QTest.hparams.exploration_decay_steps, QTest.hparams.exploration_decay_rate) state_distribution, state_ph = gym_ops.distribution_from_gym_space( env.observation_space, name='state_space') with variable_scope.variable_scope('logits'): action_value_op = mlp(state_ph, QTest.hparams.hidden_layers) action_distribution, action_value_op = gym_ops.distribution_from_gym_space( env.action_space, logits=[action_value_op], name='action_space') action_op = array_ops.squeeze(sampling_ops.epsilon_greedy( action_distribution, exploration_op, deterministic_ph)) policy_variables = variables.trainable_variables(scope='logits') next_state_ph = shortcuts.placeholder_like(state_ph, name='next_state_space') with variable_scope.variable_scope('logits', reuse=True): next_action_value_op = mlp(next_state_ph, QTest.hparams.hidden_layers) next_action_distribution, next_action_value_op = gym_ops.distribution_from_gym_space( env.action_space, logits=[next_action_value_op], name='action_space') next_action_op = array_ops.squeeze(sampling_ops.epsilon_greedy( next_action_distribution, exploration_op, deterministic_ph)) with variable_scope.variable_scope('target_logits'): target_next_action_value_op = mlp(next_state_ph, QTest.hparams.hidden_layers) target_next_action_distribution, target_next_action_value_op = gym_ops.distribution_from_gym_space( env.action_space, logits=[target_next_action_value_op], name='action_space') target_next_action_op = array_ops.squeeze(sampling_ops.epsilon_greedy( target_next_action_distribution, exploration_op, deterministic_ph)) assign_target_op = shortcuts.assign_scope('logits', 'target_logits') # Setup the dataset stream = streams.Uniform.from_distributions( state_distribution, action_distribution) replay_dataset = dataset.ReplayDataset( stream, max_sequence_length=QTest.hparams.max_sequence_length) replay_dataset = replay_dataset.batch(QTest.hparams.batch_size) replay_op = replay_dataset.make_one_shot_iterator().get_next() action_ph = array_ops.placeholder( stream.action_dtype, [None, None] + stream.action_shape, name='action') reward_ph = array_ops.placeholder( stream.reward_dtype, [None, None] + stream.reward_shape, name='reward') terminal_ph = array_ops.placeholder( dtypes.bool, [None, None], name='terminal') sequence_length_ph = array_ops.placeholder( dtypes.int32, [None, 1], name='sequence_length') sequence_length = array_ops.squeeze(sequence_length_ph, -1) q_value_op, expected_q_value_op = q_ops.expected_q_value( reward_ph, action_ph, action_value_op, (next_action_value_op, target_next_action_value_op), weights=(1 - math_ops.cast(terminal_ph, reward_ph.dtype)), discount=QTest.hparams.discount) # mean_squared_error loss_op = math_ops.square(q_value_op - expected_q_value_op) loss_op = math_ops.reduce_mean( math_ops.reduce_sum(loss_op, axis=-1) / math_ops.cast( sequence_length, loss_op.dtype)) optimizer = adam.AdamOptimizer( learning_rate=QTest.hparams.learning_rate) train_op = optimizer.minimize(loss_op, var_list=policy_variables) train_op = control_flow_ops.cond( gen_math_ops.equal( gen_math_ops.mod( ops.convert_to_tensor( QTest.hparams.assign_target_steps, dtype=dtypes.int64), (global_step + 1)), 0), lambda: control_flow_ops.group(*[train_op, assign_target_op]), lambda: train_op) with self.test_session() as sess: sess.run(variables.global_variables_initializer()) sess.run(assign_target_op) for iteration in range(QTest.hparams.num_iterations): rewards = gym_test_utils.rollout_on_gym_env( sess, env, state_ph, deterministic_ph, action_value_op, action_op, num_episodes=QTest.hparams.num_episodes, stream=stream) while True: try: replay = sess.run(replay_op) except (errors_impl.InvalidArgumentError, errors_impl.OutOfRangeError): break _, loss = sess.run( (train_op, loss_op), feed_dict={ state_ph: replay.state, next_state_ph: replay.next_state, action_ph: replay.action, reward_ph: replay.reward, terminal_ph: replay.terminal, sequence_length_ph: replay.sequence_length, }) rewards = gym_test_utils.rollout_on_gym_env( sess, env, state_ph, deterministic_ph, action_value_op, action_op, num_episodes=QTest.hparams.num_episodes, deterministic=True, save_replay=False) print('average_rewards = {}'.format(rewards / QTest.hparams.num_episodes)) @test_util.skip_if(True) def test_q_ops_quantile_dqn(self): env = gym.make('CartPole-v0') ops.reset_default_graph() np.random.seed(42) random_seed.set_random_seed(42) env.seed(42) # Setup the policy and model global_step = training_util.get_or_create_global_step() deterministic_ph = array_ops.placeholder( dtypes.bool, [], name='deterministic') exploration_op = learning_rate_decay.exponential_decay( QTest.hparams.initial_exploration, global_step, QTest.hparams.exploration_decay_steps, QTest.hparams.exploration_decay_rate) state_distribution, state_ph = gym_ops.distribution_from_gym_space( env.observation_space, name='state_space') action_distribution, _ = gym_ops.distribution_from_gym_space( env.action_space, name='action_space') # Setup the dataset stream = streams.Uniform.from_distributions( state_distribution, action_distribution) with variable_scope.variable_scope('logits'): action_value_op = mlp(state_ph, QTest.hparams.hidden_layers) action_value_op = core.dense( action_value_op, stream.action_value_shape[-1] * QTest.hparams.num_quantiles, use_bias=False) action_value_op_shape = array_ops.shape(action_value_op) action_value_shape = [ action_value_op_shape[0], action_value_op_shape[1], stream.action_value_shape[-1], QTest.hparams.num_quantiles] action_value_op = gen_array_ops.reshape(action_value_op, action_value_shape) mean_action_value_op = math_ops.reduce_mean(action_value_op, axis=-1) action_op = math_ops.argmax(mean_action_value_op, axis=-1) action_op = array_ops.squeeze(action_op) policy_variables = variables.trainable_variables(scope='logits') next_state_ph = shortcuts.placeholder_like(state_ph, name='next_state_space') with variable_scope.variable_scope('targets'): target_next_action_value_op = mlp(next_state_ph, QTest.hparams.hidden_layers) target_next_action_value_op = core.dense( target_next_action_value_op, stream.action_value_shape[-1] * QTest.hparams.num_quantiles, use_bias=False) target_next_action_value_op_shape = array_ops.shape(target_next_action_value_op) target_next_action_value_shape = [ target_next_action_value_op_shape[0], target_next_action_value_op_shape[1], stream.action_value_shape[-1], QTest.hparams.num_quantiles] target_next_action_value_op = gen_array_ops.reshape( target_next_action_value_op, target_next_action_value_shape) mean_target_next_action_value_op = math_ops.reduce_mean( target_next_action_value_op, axis=-1) assign_target_op = shortcuts.assign_scope('logits', 'target_logits') replay_dataset = dataset.ReplayDataset( stream, max_sequence_length=QTest.hparams.max_sequence_length) replay_dataset = replay_dataset.batch(QTest.hparams.batch_size) replay_op = replay_dataset.make_one_shot_iterator().get_next() action_ph = array_ops.placeholder( stream.action_dtype, [None, None] + stream.action_shape, name='action') reward_ph = array_ops.placeholder( stream.reward_dtype, [None, None] + stream.reward_shape, name='reward') terminal_ph = array_ops.placeholder( dtypes.bool, [None, None], name='terminal') sequence_length_ph = array_ops.placeholder( dtypes.int32, [None, 1], name='sequence_length') sequence_length = array_ops.squeeze(sequence_length_ph, -1) q_value_op, expected_q_value_op = q_ops.expected_q_value( array_ops.expand_dims(reward_ph, -1), action_ph, action_value_op, (target_next_action_value_op, mean_target_next_action_value_op), weights=array_ops.expand_dims( 1 - math_ops.cast(terminal_ph, reward_ph.dtype), -1), discount=QTest.hparams.discount) u = expected_q_value_op - q_value_op loss_op = losses_impl.huber_loss(u, delta=QTest.hparams.huber_loss_delta) tau_op = (2. * math_ops.range( 0, QTest.hparams.num_quantiles, dtype=u.dtype) + 1) / ( 2. * QTest.hparams.num_quantiles) loss_op *= math_ops.abs(tau_op - math_ops.cast(u < 0, tau_op.dtype)) loss_op = math_ops.reduce_mean(loss_op, axis=-1) loss_op = math_ops.reduce_mean( math_ops.reduce_sum(loss_op, axis=-1) / math_ops.cast( sequence_length, loss_op.dtype)) optimizer = adam.AdamOptimizer( learning_rate=QTest.hparams.learning_rate) train_op = optimizer.minimize(loss_op, var_list=policy_variables) train_op = control_flow_ops.cond( gen_math_ops.equal( gen_math_ops.mod( ops.convert_to_tensor( QTest.hparams.assign_target_steps, dtype=dtypes.int64), (global_step + 1)), 0), lambda: control_flow_ops.group(*[train_op, assign_target_op]), lambda: train_op) with self.test_session() as sess: sess.run(variables.global_variables_initializer()) sess.run(assign_target_op) for iteration in range(QTest.hparams.num_iterations): rewards = gym_test_utils.rollout_on_gym_env( sess, env, state_ph, deterministic_ph, mean_action_value_op, action_op, num_episodes=QTest.hparams.num_episodes, stream=stream) while True: try: replay = sess.run(replay_op) except (errors_impl.InvalidArgumentError, errors_impl.OutOfRangeError): break loss, _ = sess.run( (loss_op, train_op), feed_dict={ state_ph: replay.state, next_state_ph: replay.next_state, action_ph: replay.action, reward_ph: replay.reward, terminal_ph: replay.terminal, sequence_length_ph: replay.sequence_length, }) rewards = gym_test_utils.rollout_on_gym_env( sess, env, state_ph, deterministic_ph, mean_action_value_op, action_op, num_episodes=QTest.hparams.num_episodes, deterministic=True, save_replay=False) print('average_rewards = {}'.format(rewards / QTest.hparams.num_episodes))
import os from tensorflow.contrib.learn.python.learn import evaluable # pylint: disable=g-import-not-at-top from tensorflow.contrib.learn.python.learn import experiment from tensorflow.contrib.learn.python.learn import learn_runner from tensorflow.contrib.learn.python.learn import trainable from tensorflow.contrib.learn.python.learn.estimators import run_config as run_config_lib from tensorflow.contrib.training.python.training import hparam as hparam_lib from tensorflow.python.platform import test from tensorflow.python.platform import tf_logging patch = test.mock.patch _MODIR_DIR = "/tmp" _HPARAMS = hparam_lib.HParams(learning_rate=0.01) _MUST_SPECIFY_OUTPUT_DIR_MSG = "Must specify an output directory" _MISSING_MODEL_DIR_ERR_MSG = "Must specify a model directory in `run_config`." _EXP_NOT_CALLABLE_MSG = "Experiment builder .* is not callable" _INVALID_HPARAMS_ERR_MSG = "`hparams` must be `HParams` instance" _NOT_EXP_TYPE_MSG = "Experiment builder did not return an Experiment" _NON_EXIST_TASK_MSG = "Schedule references non-existent task" _NON_CALLABLE_MSG = "Schedule references non-callable member" _MUST_SPECIFY_OUTPUT_DIR_OR_CONFIG_MSG = ( "Must set value for `output_dir` or `run_config`") _HPARAMS_CANNOT_BE_SET_FOR_OUTPUT_DIR_MSG = ( "Must set `hparams` as None for `experiment_fn` with `output_dir`.") _CANNOT_SET_BOTH_OUTPUT_DIR_AND_CONFIG_MSG = ( "Cannot provide both `output_dir` and `run_config`") _INVALID_RUN_CONFIG_TYPE_MSG = "`run_config` must be `RunConfig` instance" _RUN_CONFIG_UID_CHECK_ERR_MSG = (
class XlaDecoratorTest(test.TestCase, parameterized.TestCase): @parameterized.named_parameters( ('test_use_as_decorator', decorated_model_fn, None), ('test_use_as_function', xla.estimator_model_fn(_test_train_model_fn), None), ('test_use_tpu_false_hparams', decorated_model_fn, hparam.HParams(use_tpu=False)), ('test_use_tpu_false_dict_params', decorated_model_fn, { 'use_tpu': False }), ) def test_compile(self, model_fn, params): """Calls model_fn and verifies it is compiled.""" with test.mock.patch.object(xla, 'compile') as mock_xla_compile: loss = constant_op.constant(_EXPECTED_LOSS) mock_xla_compile.return_value = [loss] features, labels = make_dummy_features_labels() estimator_spec = model_fn( features=features, labels=labels, mode=_TRAIN, params=params or {}) mock_xla_compile.assert_called_once() self.assertEqual(estimator_spec.mode, _TRAIN) with self.test_session() as sess: self.assertEqual(sess.run(estimator_spec.loss), sess.run(loss)) self.assertEqual(sess.run(estimator_spec.train_op), sess.run(loss)) @parameterized.named_parameters( ('test_use_tpu_true_hparams', decorated_model_fn, hparam.HParams(use_tpu=True)), ('test_use_tpu_true_dict_params', decorated_model_fn, { 'use_tpu': True }), ) def test_not_compile(self, model_fn, params): """Calls model_fn and verifies it is NOT compiled.""" with test.mock.patch.object(xla, 'compile') as mock_xla_compile: loss = constant_op.constant(_EXPECTED_LOSS) mock_xla_compile.return_value = [loss] features, labels = make_dummy_features_labels() estimator_spec = model_fn( features=features, labels=labels, mode=_TRAIN, params=params or {}) mock_xla_compile.assert_not_called() self.assertEqual(estimator_spec.mode, _TRAIN) with self.test_session() as sess: self.assertEqual(sess.run(estimator_spec.loss), sess.run(loss)) self.assertEqual(sess.run(estimator_spec.train_op), sess.run(loss)) def test_model_with_summary(self): """Tests that summary ops are disabled.""" @xla.estimator_model_fn def model_fn_with_summary(features, labels, mode, params): del features, labels, params loss = constant_op.constant(_EXPECTED_LOSS) summary.scalar('loss_scalar_summary', loss) summary.histogram('loss_histogram_summary', loss) summary.image('loss_image_summary', loss) return model_fn_lib.EstimatorSpec( mode=mode, loss=loss, train_op=array_ops.identity(loss)) features, labels = make_dummy_features_labels() estimator_spec = model_fn_with_summary( features=features, labels=labels, mode=_TRAIN, params={}) with self.test_session() as sess: self.assertEqual(sess.run(estimator_spec.loss), _EXPECTED_LOSS)
default=5) parser.add_argument( '--agent', help='type of agent, one of [DDPG|TD3|C2A2]', default='DDPG') parser.add_argument( '--job-dir', help='dir to save logs and videos', default='./results') parser.add_argument( '--record-video', help='whether to record video when testing', action='store_true') parser.add_argument( '--verbosity', choices=['DEBUG', 'ERROR', 'FATAL', 'INFO', 'WARN'], default='INFO') args, _ = parser.parse_known_args() # Set python level verbosity tf.logging.set_verbosity(args.verbosity) # Set C++ Graph Execution level verbosity os.environ['TF_CPP_MIN_LOG_LEVEL'] = str( tf.logging.__dict__[args.verbosity] / 10) for k, v in args.__dict__.iteritems(): tf.logging.info('{}: {}'.format(k, v)) config = hparam.HParams(**args.__dict__) train(config)