def _set_meta_data_(self): self.embedding_dimensions = len(self.vectors[0]) if self.mini_dataset_set: self.vocab_size_meta = len(self.vocab) self._meta_set_ = True else: logging.warning('Metadata was not set. Make mini dataset')
def _format_tensor(tensor, layer): """Reformats the tensor from Python-style array to C-style array.""" flattened_tensor = tensor.flatten() if not sum(flattened_tensor): logging.warning("Tensor at layer %d is a zero tensor!", layer) parsed_tensor = " ".join([("%ff," % value) for value in flattened_tensor]) return "{" + parsed_tensor + "}"
def add_single_ground_truth_image_info(self, image_key, groundtruth_box_tuples, groundtruth_class_tuples): """Adds groundtruth for a single image to be used for evaluation. Args: image_key: A unique string/integer identifier for the image. groundtruth_box_tuples: A numpy array of structures with the shape [M, 1], representing M tuples, each tuple containing the same number of named bounding boxes. Each box is of the format [y_min, x_min, y_max, x_max]. groundtruth_class_tuples: A numpy array of structures shape [M, 1], representing the class labels of the corresponding bounding boxes and possibly additional classes. """ if image_key in self._groundtruth_box_tuples: logging.warning( 'image %s has already been added to the ground truth database.', image_key) return self._groundtruth_box_tuples[image_key] = groundtruth_box_tuples self._groundtruth_class_tuples[image_key] = groundtruth_class_tuples self._update_groundtruth_statistics(groundtruth_class_tuples)
def get_variables_available_in_checkpoint(variables, checkpoint_path, include_global_step=True): """Returns the subset of variables available in the checkpoint. Inspects given checkpoint and returns the subset of variables that are available in it. TODO(rathodv): force input and output to be a dictionary. Args: variables: a list or dictionary of variables to find in checkpoint. checkpoint_path: path to the checkpoint to restore variables from. include_global_step: whether to include `global_step` variable, if it exists. Default True. Returns: A list or dictionary of variables. Raises: ValueError: if `variables` is not a list or dict. """ if isinstance(variables, list): variable_names_map = {} for variable in variables: if isinstance(variable, tf_variables.PartitionedVariable): name = variable.name else: name = variable.op.name variable_names_map[name] = variable elif isinstance(variables, dict): variable_names_map = variables else: raise ValueError('`variables` is expected to be a list or dict.') ckpt_reader = tf.train.NewCheckpointReader(checkpoint_path) ckpt_vars_to_shape_map = ckpt_reader.get_variable_to_shape_map() if not include_global_step: ckpt_vars_to_shape_map.pop(tf.GraphKeys.GLOBAL_STEP, None) vars_in_ckpt = {} for variable_name, variable in sorted(variable_names_map.items()): if variable_name in ckpt_vars_to_shape_map: if ckpt_vars_to_shape_map[variable_name] == variable.shape.as_list( ): vars_in_ckpt[variable_name] = variable else: logging.warning( 'Variable [%s] is available in checkpoint, but has an ' 'incompatible shape with model variable. Checkpoint ' 'shape: [%s], model variable shape: [%s]. This ' 'variable will not be initialized from the checkpoint.', variable_name, ckpt_vars_to_shape_map[variable_name], variable.shape.as_list()) else: # logging.warning('Variable [%s] is not available in checkpoint', # variable_name) pass if isinstance(variables, list): return list(vars_in_ckpt.values()) return vars_in_ckpt
def evaluate(self): """Computes evaluation result. Returns: A named tuple with the following fields - average_precision: a float number corresponding to average precision. precisions: an array of precisions. recalls: an array of recalls. recall@50: recall computed on 50 top-scoring samples. recall@100: recall computed on 100 top-scoring samples. median_rank@50: median rank computed on 50 top-scoring samples. median_rank@100: median rank computed on 100 top-scoring samples. """ if self._num_gt_instances == 0: logging.warning('No ground truth instances') if not self._scores: scores = np.array([], dtype=float) tp_fp_labels = np.array([], dtype=bool) else: scores = np.concatenate(self._scores) tp_fp_labels = np.concatenate(self._tp_fp_labels) relation_field_values = np.concatenate(self._relation_field_values) for relation_field_value, _ in (six.iteritems( self._num_gt_instances_per_relationship)): precisions, recalls = metrics.compute_precision_recall( scores[relation_field_values == relation_field_value], tp_fp_labels[relation_field_values == relation_field_value], self._num_gt_instances_per_relationship[relation_field_value]) self._average_precisions[ relation_field_value] = metrics.compute_average_precision( precisions, recalls) self._mean_average_precision = np.mean( list(self._average_precisions.values())) self._precisions, self._recalls = metrics.compute_precision_recall( scores, tp_fp_labels, self._num_gt_instances) self._weighted_average_precision = metrics.compute_average_precision( self._precisions, self._recalls) self._recall_50 = (metrics.compute_recall_at_k(self._tp_fp_labels, self._num_gt_instances, 50)) self._median_rank_50 = (metrics.compute_median_rank_at_k( self._tp_fp_labels, 50)) self._recall_100 = (metrics.compute_recall_at_k( self._tp_fp_labels, self._num_gt_instances, 100)) self._median_rank_100 = (metrics.compute_median_rank_at_k( self._tp_fp_labels, 100)) return VRDDetectionEvalMetrics( self._weighted_average_precision, self._mean_average_precision, self._average_precisions, self._precisions, self._recalls, self._recall_50, self._recall_100, self._median_rank_50, self._median_rank_100)
def remove_training_directory(train_dir, task): """Removes the training directory.""" try: logging.warning("%s: Removing existing train directory.", task_as_string(task)) gfile.DeleteRecursively(train_dir) except: logging.error( "%s: Failed to delete directory " + train_dir + " when starting a new model. Please delete it manually and" + " try again.", task_as_string(task))
def add_single_detected_image_info(self, image_id, detections_dict): """Adds detections for a single image to be used for evaluation. Args: image_id: A unique string/integer identifier for the image. detections_dict: A dictionary containing - standard_fields.DetectionResultFields.detection_boxes: A numpy array of structures with shape [N, 1], representing N tuples, each tuple containing the same number of named bounding boxes. Each box is of the format [y_min, x_min, y_max, x_max] (as an example see datatype vrd_box_data_type, single_box_data_type above). standard_fields.DetectionResultFields.detection_scores: float32 numpy array of shape [N] containing detection scores for the boxes. standard_fields.DetectionResultFields.detection_classes: A numpy array of structures shape [N, 1], representing the class labels of the corresponding bounding boxes and possibly additional classes (see datatype label_data_type above). """ if image_id not in self._image_ids: logging.warning('No groundtruth for the image with id %s.', image_id) # Since for the correct work of evaluator it is assumed that groundtruth # is inserted first we make sure to break the code if is it not the case. self._image_ids.update([image_id]) self._negative_labels[image_id] = np.array([]) self._evaluatable_labels[image_id] = np.array([]) num_detections = detections_dict[ standard_fields.DetectionResultFields.detection_boxes].shape[0] detection_class_tuples = detections_dict[ standard_fields.DetectionResultFields.detection_classes] detection_box_tuples = detections_dict[ standard_fields.DetectionResultFields.detection_boxes] negative_selector = np.zeros(num_detections, dtype=bool) selector = np.ones(num_detections, dtype=bool) # Only check boxable labels for field in detection_box_tuples.dtype.fields: # Verify if one of the labels is negative (this is sure FP) negative_selector |= np.isin(detection_class_tuples[field], self._negative_labels[image_id]) # Verify if all labels are verified selector &= np.isin(detection_class_tuples[field], self._evaluatable_labels[image_id]) selector |= negative_selector self._evaluation.add_single_detected_image_info( image_key=image_id, detected_box_tuples=self._process_detection_boxes( detection_box_tuples[selector]), detected_scores=detections_dict[ standard_fields.DetectionResultFields.detection_scores] [selector], detected_class_tuples=detection_class_tuples[selector])
def predict(model_name, model=None, row_start=None, row_end=None, custom_objects=None): if 'batch_size' not in config: config['batch_size'] = default_batch_size if 'max_queue_size' not in config: config['max_queue_size'] = default_max_queue_size if 'does_use_multiprocessing' not in config: config['does_use_multiprocessing'] = default_does_use_multiprocessing if 'worker_number' not in config: config['worker_number'] = default_worker_number if 'verbose' not in config: config['verbose'] = default_verbose if model is None: if custom_objects is None: custom_objects = custom_metrics model = load_model(model_name=model_name, custom_objects=custom_objects, does_compile=True) if model is None: raise NoTrainedModelException(model_name) rolling_window_size = get_rolling_window_size(model_name) generator = DataGenerator( dataset_name=DATASET_NAME_PREDICT, rolling_window_size=rolling_window_size, row_start=row_start, row_end=row_end, max_batch_size=config['batch_size'], does_shuffle=False, # NOT shuffle! ) snpr = generator.get_sample_number_per_row() if config['batch_size'] % snpr != 0: logging.warning( 'predict: batch_size(%d) cannot divide %d. ' 'Some inputs will be ignored.', config['batch_size'], snpr) result = model.predict_generator( generator=generator, max_queue_size=config['max_queue_size'], use_multiprocessing=config['does_use_multiprocessing'], workers=config['worker_number'], verbose=config['verbose'], ) return result
def next_batch(self): """Return a Batch from the batch queue. If mode='infer' then each batch contains a single example repeated beam_size-many times; this is necessary for beam search. Returns: batch: a Batch object, or None if we're in single_pass mode and we've exhausted the dataset. """ # If the batch queue is empty, print a warning if self._batch_queue.qsize() == 0: log.warning( 'Bucket input queue is empty when calling next_batch. Bucket queue size: %i, Input queue size: %i', self._batch_queue.qsize(), self._example_queue.qsize()) if self._single_pass and self._finished_reading: log.info("Finished reading dataset in single_pass mode.") return None batch = self._batch_queue.get() # get the next Batch return batch
def get_tfrecord_files(train_or_eval_files, num_workers=1): """Split dataset by worker. Args: num_workers: String, the name of the dataset. file_pattern: The file pattern to use for matching the dataset source files. Returns: A file list. Raises: ValueError: If the dataset is unknown. """ if FLAGS.dataset_name == 'mock': return [] ret = [] all_tfrecord_files = [] dataset_dir = FLAGS.dataset_dir if dataset_dir is None: raise ValueError('Need to specify dataset, mock or real.') assert train_or_eval_files is not None files_list = train_or_eval_files.split(',') for file_name in files_list: all_tfrecord_files.append(os.path.join(dataset_dir, file_name)) if (len(all_tfrecord_files) // num_workers) <= 0: raise ValueError('Require num_training_files_per_worker > 0 with num_training_files({}) < num_workers({}).'\ .format(len(all_tfrecord_files), num_workers)) if len(all_tfrecord_files) % num_workers > 0: logging.warning( "{} files can not be distributed equally between {} workers.". format(len(all_tfrecord_files), num_workers)) all_tfrecord_files.sort() for i in range(len(all_tfrecord_files)): if i % num_workers == FLAGS.task_index: ret.append(all_tfrecord_files[i]) logging.info('Worker Host {} handles {} files including {}.'.format( FLAGS.task_index, len(ret), ret)) return ret
def get_vars_available_in_ckpt(name_to_var_map, checkpoint_path, include_global_step=True): """Returns the variable name to variable mapping used to initialize an `tf.train.Saver` object. Inspects given checkpoint and returns the subset of variables that are available in it. Args: name_to_var_map: a dict mapping from variable name to variable. checkpoint_path: string scalar, path to the checkpoint to restore variables from. include_global_step: bool scalar, whether to include `global_step` variable, if exists. Defaults to True. Returns: vars_in_ckpt: a dict mapping from variable name to variable. """ reader = tf.train.NewCheckpointReader(checkpoint_path) vars_to_shape_map = reader.get_variable_to_shape_map() if not include_global_step: vars_to_shape_map.pop(tf.GraphKeys.GLOBAL_STEP, None) vars_in_ckpt = {} for var_name, var in sorted(name_to_var_map.items()): if var_name in vars_to_shape_map: if vars_to_shape_map[var_name] == var.shape.as_list(): vars_in_ckpt[var_name] = var else: logging.warning( 'Variable [%s] is available in checkpoint, but has an ' 'incompatible shape with model variable.', var_name) else: logging.warning('Variable [%s] is not available in checkpoint', var_name) return vars_in_ckpt
def text_generator(self, example_generator): """Generates article and abstract text from tf.Example. Args: example_generator: a generator of tf.Examples from file. See data.example_generator""" while True: e = next(example_generator) # e is a tf.Example try: article_text = e.features.feature['article'].bytes_list.value[ 0].decode( ) # the article text was saved under the key 'article' in the data files abstract_text = e.features.feature[ 'abstract'].bytes_list.value[0].decode( ) # the abstract text was saved under the key 'abstract' in the data files except ValueError: log.error('Failed to get article or abstract from example') continue if len( article_text ) == 0: # See https://github.com/abisee/pointer-generator/issues/1 log.warning( 'Found an example with empty article text. Skipping it.') else: yield (article_text, abstract_text)
def get_meta_filename(start_new_model, train_dir, task): if start_new_model: logging.warning( "%s: Flag 'start_new_model' is set. Building a new model.", task_as_string(task)) return None latest_checkpoint = tf.train.latest_checkpoint(train_dir) if not latest_checkpoint: logging.warning("%s: No checkpoint file found. Building a new model.", task_as_string(task)) return None meta_filename = latest_checkpoint + ".meta" if not gfile.Exists(meta_filename): logging.warning("%s: No meta graph file found. Building a new model.", task_as_string(task)) return None else: return meta_filename
def recover_model(task, meta_filename): logging.warning("%s: Restoring from meta graph file %s", task_as_string(task), meta_filename) return tf.train.import_meta_graph(meta_filename)
def main(unused_argv): env = json.loads(os.environ.get("TF_CONFIG", "{}")) cluster_data = env.get("cluster", None) cluster = tf.train.ClusterSpec(cluster_data) if cluster_data else None task_data = env.get("task", None) or {"type": "master", "index": 0} task = type("TaskSpec", (object, ), task_data) is_master = (task.type == "master" and task.index == 0) train_dir = FLAGS.train_dir D = 4 # input dimensionality if cluster: logging.warning("%s: Starting trainer within cluster %s.", task_as_string(task), cluster.as_dict()) server = start_server(cluster, task) target = server.target device_fn = tf.train.replica_device_setter( ps_device="/job:ps", worker_device="/job:%s/task:%d" % (task.type, task.index), cluster=cluster) else: target = "" device_fn = "" config = tf.ConfigProto(allow_soft_placement=True, log_device_placement=False) if not cluster or task.type == "master" or task.type == "worker": model = find_class_by_name(FLAGS.model, [models])() with tf.Graph().as_default() as graph: meta_filename = get_meta_filename(False, train_dir, task) if meta_filename: logging.warning("using saved model %s", meta_filename) saver = recover_model(task, meta_filename) else: raise ("meta file not found") with tf.device(device_fn): init = tf.global_variables_initializer() global_step = tf.get_collection("global_step")[0] model.get_collection(global_step) summary_writer = tf.summary.FileWriter(FLAGS.train_dir, graph=tf.get_default_graph()) sv = tf.train.Supervisor(graph, logdir=train_dir, init_op=init, is_chief=is_master, global_step=global_step, save_model_secs=3600, save_summaries_secs=120, saver=saver) # Launch the graph xs, hs, dlogps, drs, ys, tfps = [], [], [], [], [], [] running_reward_sum = 0 reward_sum = 0 episode_number = 0 total_episodes = 10 logging.warning("%s: Starting managed session.", task_as_string(task)) with sv.managed_session(target, config=config) as sess: env = env_wrapper.Service() rendering = False observation = env.reset( ) # Obtain an initial observation of the environment model.before(sess) while episode_number < total_episodes: # Make sure the observation is in a shape the network can handle. x = np.reshape(observation, [1, D]) action = model.get_action(sess, x) # step the environment and get new measurements observation, reward, done, info = env.step(action) reward_sum += reward running_reward_sum += reward drs.append( reward ) # record reward (has to be done after we call step() to get reward for previous action) if done: episode_number += 1 logging.info( 'Reward for episode %d of 200 = %f. Total average reward %f.' % (episode_number, reward_sum, running_reward_sum / episode_number)) reward_sum = 0 observation = env.reset() done = False print "" logging.info( 'Total reward: %d, Avg reward: %f' % (running_reward_sum, running_reward_sum / total_episodes)) avg_reward_sum = running_reward_sum / total_episodes summary_writer.add_summary( MakeSummary("GlobalStep/Eval_TotalRewardSum", running_reward_sum), running_reward_sum) summary_writer.add_summary( MakeSummary("GlobalStep/Eval_AvgRewardSum", avg_reward_sum), avg_reward_sum) summary_writer.flush() import getpass results = env.submit(conf.kaggle_user, conf.kaggle_passwd) print results
def define_data_input(model, queue_batch=None): """Adds TF ops to load input data.""" label_volume_map = {} for vol in FLAGS.label_volumes.split(','): volname, path, dataset = vol.split(':') label_volume_map[volname] = h5py.File(path)[dataset] image_volume_map = {} for vol in FLAGS.data_volumes.split(','): volname, path, dataset = vol.split(':') image_volume_map[volname] = h5py.File(path)[dataset] if queue_batch is None: queue_batch = FLAGS.batch_size # Fetch sizes of images and labels label_size = train_labels_size(model) image_size = train_image_size(model) label_radii = (label_size // 2).tolist() label_size = label_size.tolist() image_radii = (image_size // 2).tolist() image_size = image_size.tolist() # Fetch a single coordinate and volume name from a queue reading the # coordinate files or from saved hard/important examples import os.path if os.path.isfile(FLAGS.train_coords): logging.info('{} exists.'.format(FLAGS.train_coords)) else: logging.error('{} does not exist.'.format(FLAGS.train_coords)) if FLAGS.sharding_rule == 0: coord, volname = inputs.load_patch_coordinates(FLAGS.train_coords) elif FLAGS.sharding_rule == 1 and 'horovod' in sys.modules: d = tf.data.TFRecordDataset(FLAGS.train_coords, compression_type='GZIP') d = d.shard(hvd.size(), hvd.rank()) d = d.map(parser_fn) iterator = d.make_one_shot_iterator() coord, volname = iterator.get_next() else: logging.warning("You need to install Horovod to use sharding. Turning sharding off..") FLAGS.sharding_rule = 0 coord, volname = inputs.load_patch_coordinates(FLAGS.train_coords) # Load object labels (segmentation). labels = inputs.load_from_numpylike( coord, volname, label_size, label_volume_map) label_shape = [1] + label_size[::-1] + [1] #label_shape = [1] + [1] + label_size[::-1] # NCDHW labels = tf.reshape(labels, label_shape) loss_weights = tf.constant(np.ones(label_shape, dtype=np.float32)) # Load image data. patch = inputs.load_from_numpylike( coord, volname, image_size, image_volume_map) data_shape = [1] + image_size[::-1] + [1] patch = tf.reshape(patch, shape=data_shape) if ((FLAGS.image_stddev is None or FLAGS.image_mean is None) and not FLAGS.image_offset_scale_map): raise ValueError('--image_mean, --image_stddev or --image_offset_scale_map ' 'need to be defined') # Convert segmentation into a soft object mask. lom = tf.logical_and( labels > 0, tf.equal(labels, labels[0, label_radii[2], label_radii[1], label_radii[0], 0])) labels = inputs.soften_labels(lom) # Apply basic augmentations. transform_axes = augmentation.PermuteAndReflect( rank=5, permutable_axes=_get_permutable_axes(), reflectable_axes=_get_reflectable_axes()) labels = transform_axes(labels) patch = transform_axes(patch) loss_weights = transform_axes(loss_weights) # Normalize image data. patch = inputs.offset_and_scale_patches( patch, volname[0], offset_scale_map=_get_offset_and_scale_map(), default_offset=FLAGS.image_mean, default_scale=FLAGS.image_stddev) # Create a batch of examples. Note that any TF operation before this line # will be hidden behind a queue, so expensive/slow ops can take advantage # of multithreading. #MK TODO: check num_threads usage here patches, labels, loss_weights = tf.train.shuffle_batch( [patch, labels, loss_weights], queue_batch, num_threads=max(1, FLAGS.batch_size // 2), capacity=32 * FLAGS.batch_size, min_after_dequeue=4 * FLAGS.batch_size, enqueue_many=True) return patches, labels, loss_weights, coord, volname
def __init__(self, num_units, num_dims=1, input_dims=None, output_dims=None, priority_dims=None, non_recurrent_dims=None, tied=False, cell_fn=None, non_recurrent_fn=None, state_is_tuple=True, output_is_tuple=True): """Initialize the parameters of a Grid RNN cell Args: num_units: int, The number of units in all dimensions of this GridRNN cell num_dims: int, Number of dimensions of this grid. input_dims: int or list, List of dimensions which will receive input data. output_dims: int or list, List of dimensions from which the output will be recorded. priority_dims: int or list, List of dimensions to be considered as priority dimensions. If None, no dimension is prioritized. non_recurrent_dims: int or list, List of dimensions that are not recurrent. The transfer function for non-recurrent dimensions is specified via `non_recurrent_fn`, which is default to be `tensorflow.nn.relu`. tied: bool, Whether to share the weights among the dimensions of this GridRNN cell. If there are non-recurrent dimensions in the grid, weights are shared between each group of recurrent and non-recurrent dimensions. cell_fn: function, a function which returns the recurrent cell object. Has to be in the following signature: ``` def cell_func(num_units): # ... ``` and returns an object of type `RNNCell`. If None, LSTMCell with default parameters will be used. Note that if you use a custom RNNCell (with `cell_fn`), it is your responsibility to make sure the inner cell use `state_is_tuple=True`. non_recurrent_fn: a tensorflow Op that will be the transfer function of the non-recurrent dimensions state_is_tuple: If True, accepted and returned states are tuples of the states of the recurrent dimensions. If False, they are concatenated along the column axis. The latter behavior will soon be deprecated. Note that if you use a custom RNNCell (with `cell_fn`), it is your responsibility to make sure the inner cell use `state_is_tuple=True`. output_is_tuple: If True, the output is a tuple of the outputs of the recurrent dimensions. If False, they are concatenated along the column axis. The later behavior will soon be deprecated. Raises: TypeError: if cell_fn does not return an RNNCell instance. """ if not state_is_tuple: logging.warning('%s: Using a concatenated state is slower and will ' 'soon be deprecated. Use state_is_tuple=True.', self) if not output_is_tuple: logging.warning('%s: Using a concatenated output is slower and will ' 'soon be deprecated. Use output_is_tuple=True.', self) if num_dims < 1: raise ValueError('dims must be >= 1: {}'.format(num_dims)) self._config = _parse_rnn_config(num_dims, input_dims, output_dims, priority_dims, non_recurrent_dims, non_recurrent_fn or nn.selu, tied, num_units) self._state_is_tuple = state_is_tuple self._output_is_tuple = output_is_tuple if cell_fn is None: my_cell_fn = functools.partial( rnn.LSTMCell, num_units=num_units, state_is_tuple=state_is_tuple) else: def my_cell_fn(): return cell_fn(num_units) if tied: self._cells = [my_cell_fn()] * num_dims else: self._cells = [my_cell_fn() for _ in range(num_dims)] if not isinstance(self._cells[0], rnn.RNNCell): raise TypeError('cell_fn must return an RNNCell instance, saw: %s' % type(self._cells[0])) if self._output_is_tuple: self._output_size = tuple(self._cells[0].output_size for _ in self._config.outputs) else: self._output_size = self._cells[0].output_size * \ len(self._config.outputs) if self._state_is_tuple: self._state_size = tuple(self._cells[0].state_size for _ in self._config.recurrents) else: self._state_size = self._cell_state_size() * len(self._config.recurrents)
def train_model(model_name, model, row_start=None, row_end=None, step=None, initial_epoch=0, end_epoch=1, time_limit=None): if initial_epoch >= end_epoch: logging.error('initial_epoch(%d) >= end_epoch(%d).') return None if 'batch_size' not in config: config['batch_size'] = default_batch_size if 'does_shuffle' not in config: config['does_shuffle'] = default_does_shuffle if 'callbacks' not in config: config['callbacks'] = default_callbacks if 'monitored_loss_name' not in config: config['monitored_loss_name'] = default_monitored_loss_name if 'max_queue_size' not in config: config['max_queue_size'] = default_max_queue_size if 'does_use_multiprocessing' not in config: config['does_use_multiprocessing'] = default_does_use_multiprocessing if 'worker_number' not in config: config['worker_number'] = default_worker_number if 'verbose' not in config: config['verbose'] = default_verbose callbacks = list() if config['callbacks'] is not None else None if callbacks is not None: for cb in config['callbacks']: if isinstance(cb, keras.callbacks.Callback): if isinstance(cb, TimeLimiter) and time_limit is not None: logging.warning( 'train_model: parameter time_limit is not None, ignored TimeLimiter in config.' ) continue callbacks.append(cb) elif isinstance(cb, str): cb_str = cb.lower() cb_str = re.sub(pattern=_remove_pattern, repl='', string=cb_str) sep_idx = cb_str.find(':') cb_params = dict() if sep_idx >= 0: cb_name = cb_str[:sep_idx] cb_params_strs = cb_str[sep_idx + 1:].split(',') for cb_param_str in cb_params_strs: eq_idx = cb_param_str.find('=') if eq_idx >= 0: cb_params[ cb_param_str[:eq_idx]] = cb_param_str[eq_idx + 1:] else: cb_params[cb_param_str] = '1' else: cb_name = cb_str if cb_name == 'earlystopping': es_monitor = config[ 'monitored_loss_name'] if 'monitor' not in cb_params else cb_params[ 'monitor'] if 'baseline' not in cb_params: _, es_baseline = load_best_info( model_name=model_name, monitor_name=es_monitor) else: es_baseline = float(cb_params['baseline']) callbacks.append( keras.callbacks.EarlyStopping( monitor=es_monitor, min_delta=EPSILON if 'min_delta' not in cb_params else float(cb_params['min_delta']), patience=2 if 'patience' not in cb_params else int( cb_params['patience']), verbose=1 if 'verbose' not in cb_params else int( cb_params['verbose']), mode='min' if 'mode' not in cb_params else cb_params['mode'], baseline=es_baseline, )) elif cb_name == 'tensorboard': callbacks.append( keras.callbacks.TensorBoard( log_dir=os.path.join(LOG_DIRECTORY, model_name) if 'log_dir' not in cb_params else cb_params['log_dir'], batch_size=config['batch_size'], write_graph=True if 'write_graph' not in cb_params else str_to_bool(cb_params['write_graph']), )) elif cb_name == 'modelsaver': callbacks.append( ModelSaver( model_name=model_name, period=1 if 'period' not in cb_params else int( cb_params['period']), verbose=1 if 'verbose' not in cb_params else int( cb_params['verbose']), )) elif cb_name == 'epochnumbersaver': callbacks.append( EpochNumberSaver( model_name=model_name, verbose=1 if 'verbose' not in cb_params else int( cb_params['verbose']), )) elif cb_name == 'bestinfosaver': bi_monitor = config[ 'monitored_loss_name'] if 'monitor' not in cb_params else cb_params[ 'monitor'] if 'baseline' not in cb_params: _, bi_baseline = load_best_info( model_name=model_name, monitor_name=bi_monitor) else: bi_baseline = float(cb_params['baseline']) callbacks.append( BestInfoSaver( model_name=model_name, monitor=bi_monitor, mode='min' if 'mode' not in cb_params else cb_params['mode'], baseline=bi_baseline, verbose=1 if 'verbose' not in cb_params else int( cb_params['verbose']), )) elif cb_name == 'timelimiter': if time_limit is not None: logging.warning( 'train_model: parameter time_limit is not None, ignored TimeLimiter in config.' ) continue if 'limit' not in cb_params: raise ValueError( "TimeLimiter's parameter limit is missed.") callbacks.append( TimeLimiter( limit=cb_params['limit'], verbose=1 if 'verbose' not in cb_params else int( cb_params['verbose']), )) else: raise UnknownCallbackNameException(cb) else: raise TypeError( 'Callback must be an instance of keras.callbacks.Callback or a callback name(string).' ) if time_limit is not None: callbacks.append(TimeLimiter(limit=time_limit, verbose=1)) rolling_window_size = get_rolling_window_size(model_name) generator = SquareExDataGenerator( dataset_name=DATASET_NAME_TRAIN, rolling_window_size=rolling_window_size, row_start=row_start, row_end=row_end, step=step, max_batch_size=config['batch_size'], does_shuffle=config['does_shuffle'], ) history = model.fit_generator( generator=generator, epochs=end_epoch, verbose=config['verbose'], callbacks=callbacks, max_queue_size=config['max_queue_size'], use_multiprocessing=config['does_use_multiprocessing'], workers=config['worker_number'], initial_epoch=initial_epoch, ) return history
def analyze_video(video_file, frame_iterator, clip_iterator, video_predictors, lda_model, lda_vectorizer): """Uses yt8m model to analyze video clips of the video file. Args: video_file: Path to video file (e.g. mp4) frame_iterator: An instance of FrameIterator. clip_iterator: An instance of ClipIterator. video_predictors: A list of VideoPredictors. Returns: A python dict involving the results. """ # Decode video frames from the raw video file. rgb_images = [] for rgb in frame_iterator.frame_iterator(video_file, every_ms=1000.0 / FLAGS.frames_per_second): rgb_images.append(rgb) if not rgb_images: logging.warning('Could not get features for %s.', video_file) return None # Split video frames into video clips. video_id = video_file.split('/')[-1].split('.')[0] clips = [clip for clip in clip_iterator.clip_iterator(rgb_images)] # Downsample the video clips. if FLAGS.sample_every_n > 1: clips = _downsample(clips, FLAGS.sample_every_n) # Predict results. for clip in clips: predictions = [] for predictor in video_predictors: predictions.extend(predictor.predict(clip['rgb_images'])) clip['predictions'] = predictions # LDA. words = set() for prediction in clip['predictions']: if prediction['name'].lower() == 'symbol': continue for result in prediction['results']: if result['word']: words.add(result['word']) document_word_mat = lda_vectorizer.transform([list(words)]) document_topic_mat = lda_model.transform(document_word_mat) topic_word_mat = lda_model.components_ / lda_model.components_.sum( axis=1)[:, numpy.newaxis] document_word_reconstruct = numpy.matmul(document_topic_mat, topic_word_mat) lda_vocab = lda_vectorizer.get_feature_names() lda_words = [ lda_vocab[x] for x in document_word_reconstruct.argsort()[0][::-1][:5] ] clip['lda_words'] = lda_words return {'video_id': video_id, 'clips': clips}
def main(unused_argv): env = json.loads(os.environ.get("TF_CONFIG", "{}")) cluster_data = env.get("cluster", None) cluster = tf.train.ClusterSpec(cluster_data) if cluster_data else None task_data = env.get("task", None) or {"type": "master", "index": 0} task = type("TaskSpec", (object, ), task_data) is_master = (task.type == "master" and task.index == 0) train_dir = FLAGS.train_dir if cluster: logging.warning("%s: Starting trainer within cluster %s.", task_as_string(task), cluster.as_dict()) server = start_server(cluster, task) target = server.target device_fn = tf.train.replica_device_setter( ps_device="/job:ps", worker_device="/job:%s/task:%d" % (task.type, task.index), cluster=cluster) else: target = "" device_fn = "" config = tf.ConfigProto(allow_soft_placement=True, log_device_placement=False) if is_master and FLAGS.start_new_model: remove_training_directory(train_dir, task) if not cluster or task.type == "master" or task.type == "worker": env = gym.make('CartPole-v0') model = find_class_by_name(FLAGS.model, [models])() batch_size = FLAGS.batch_size # every how many episodes to do a param update? last_model_export_step = 0 export_model_steps = FLAGS.export_model_steps with tf.Graph().as_default() as graph: meta_filename = get_meta_filename(FLAGS.start_new_model, train_dir, task) if meta_filename: logging.warning("using saved model %s", meta_filename) saver = recover_model(task, meta_filename) with tf.device(device_fn): if not meta_filename: global_step = tf.Variable(0, trainable=False, name="global_step") local_device_protos = device_lib.list_local_devices() gpus = [ x.name for x in local_device_protos if x.device_type == 'GPU' ] num_gpus = len(gpus) if num_gpus > 0: logging.warning("Using the following GPUs to train: " + str(gpus)) num_towers = num_gpus device_string = '/gpu:%d' else: logging.warning("No GPUs found. Training on CPU.") num_towers = 1 device_string = '/cpu:%d' for i in range(num_towers): with (tf.variable_scope( ("tower"), reuse=True if i > 0 else None)): with (slim.arg_scope( [slim.model_variable, slim.variable], device="/cpu:0" if num_gpus != 1 else "/gpu:0")): results = model.build_graph(global_step) model.add_to_collection(results) model.collect() tf.add_to_collection("global_step", global_step) saver = tf.train.Saver(max_to_keep=0, keep_checkpoint_every_n_hours=0.25) init = tf.global_variables_initializer() global_step = tf.get_collection("global_step")[0] model.get_collection(global_step) sv = tf.train.Supervisor(graph, logdir=train_dir, init_op=init, is_chief=is_master, global_step=global_step, save_model_secs=3600, save_summaries_secs=120, saver=saver) # Launch the graph running_reward = None reward_sum = 0 episode_number = 1 total_episodes = FLAGS.total_episodes D = 4 #input dimensionality logging.warning("%s: Starting managed session.", task_as_string(task)) with sv.managed_session(target, config=config) as sess: rendering = FLAGS.rendering observation = env.reset( ) # Obtain an initial observation of the environment model.before(sess) while episode_number <= total_episodes: if rendering: env.render() time.sleep(1. / 24) # Make sure the observation is in a shape the network can handle. x = np.reshape(observation, [1, D]) # Run the policy network and get an action to take. action = model.get_action(sess, x) # step the environment and get new measurements observation, reward, done, info = env.step(action) reward_sum += reward model.after_action(sess, reward, info) if done: episode_number += 1 global_step_val = model.after_episode(sess) # If we have completed enough episodes, then update the policy network with our gradients. if episode_number % batch_size == 0: model.after_batch(sess) # Give a summary of how well our network is doing for each batch of episodes. running_reward = reward_sum if running_reward is None else running_reward * 0.99 + reward_sum * 0.01 logging.info( 'Global step %d. Average reward for episode %f. Total average reward %f.' % (global_step_val, reward_sum / batch_size, running_reward / batch_size)) if reward_sum / batch_size > 200: logging.info("Task solved in", episode_number, 'episodes!') break reward_sum = 0 time_to_export = ((last_model_export_step == 0) or (global_step_val - last_model_export_step >= export_model_steps)) if is_master and time_to_export: last_checkpoint = saver.save(sess, sv.save_path, global_step_val) last_model_export_step = global_step_val observation = env.reset() if is_master: last_checkpoint = saver.save(sess, sv.save_path, global_step_val) last_model_export_step = global_step_val model.after()