Exemple #1
0
 def _set_meta_data_(self):
     self.embedding_dimensions = len(self.vectors[0])
     if self.mini_dataset_set:
         self.vocab_size_meta = len(self.vocab)
         self._meta_set_ = True
     else:
         logging.warning('Metadata was not set. Make mini dataset')
Exemple #2
0
def _format_tensor(tensor, layer):
    """Reformats the tensor from Python-style array to C-style array."""
    flattened_tensor = tensor.flatten()
    if not sum(flattened_tensor):
        logging.warning("Tensor at layer %d is a zero tensor!", layer)
    parsed_tensor = " ".join([("%ff," % value) for value in flattened_tensor])
    return "{" + parsed_tensor + "}"
    def add_single_ground_truth_image_info(self, image_key,
                                           groundtruth_box_tuples,
                                           groundtruth_class_tuples):
        """Adds groundtruth for a single image to be used for evaluation.

    Args:
      image_key: A unique string/integer identifier for the image.
      groundtruth_box_tuples: A numpy array of structures with the shape
          [M, 1], representing M tuples, each tuple containing the same number
          of named bounding boxes.
          Each box is of the format [y_min, x_min, y_max, x_max].
      groundtruth_class_tuples: A numpy array of structures shape [M, 1],
          representing  the class labels of the corresponding bounding boxes and
          possibly additional classes.
    """
        if image_key in self._groundtruth_box_tuples:
            logging.warning(
                'image %s has already been added to the ground truth database.',
                image_key)
            return

        self._groundtruth_box_tuples[image_key] = groundtruth_box_tuples
        self._groundtruth_class_tuples[image_key] = groundtruth_class_tuples

        self._update_groundtruth_statistics(groundtruth_class_tuples)
Exemple #4
0
def get_variables_available_in_checkpoint(variables,
                                          checkpoint_path,
                                          include_global_step=True):
    """Returns the subset of variables available in the checkpoint.

  Inspects given checkpoint and returns the subset of variables that are
  available in it.

  TODO(rathodv): force input and output to be a dictionary.

  Args:
    variables: a list or dictionary of variables to find in checkpoint.
    checkpoint_path: path to the checkpoint to restore variables from.
    include_global_step: whether to include `global_step` variable, if it
      exists. Default True.

  Returns:
    A list or dictionary of variables.
  Raises:
    ValueError: if `variables` is not a list or dict.
  """
    if isinstance(variables, list):
        variable_names_map = {}
        for variable in variables:
            if isinstance(variable, tf_variables.PartitionedVariable):
                name = variable.name
            else:
                name = variable.op.name
            variable_names_map[name] = variable
    elif isinstance(variables, dict):
        variable_names_map = variables
    else:
        raise ValueError('`variables` is expected to be a list or dict.')
    ckpt_reader = tf.train.NewCheckpointReader(checkpoint_path)
    ckpt_vars_to_shape_map = ckpt_reader.get_variable_to_shape_map()
    if not include_global_step:
        ckpt_vars_to_shape_map.pop(tf.GraphKeys.GLOBAL_STEP, None)
    vars_in_ckpt = {}
    for variable_name, variable in sorted(variable_names_map.items()):
        if variable_name in ckpt_vars_to_shape_map:
            if ckpt_vars_to_shape_map[variable_name] == variable.shape.as_list(
            ):
                vars_in_ckpt[variable_name] = variable
            else:
                logging.warning(
                    'Variable [%s] is available in checkpoint, but has an '
                    'incompatible shape with model variable. Checkpoint '
                    'shape: [%s], model variable shape: [%s]. This '
                    'variable will not be initialized from the checkpoint.',
                    variable_name, ckpt_vars_to_shape_map[variable_name],
                    variable.shape.as_list())
        else:
            #   logging.warning('Variable [%s] is not available in checkpoint',
            #                   variable_name)
            pass
    if isinstance(variables, list):
        return list(vars_in_ckpt.values())
    return vars_in_ckpt
    def evaluate(self):
        """Computes evaluation result.

    Returns:
      A named tuple with the following fields -
        average_precision: a float number corresponding to average precision.
        precisions: an array of precisions.
        recalls: an array of recalls.
        recall@50: recall computed on 50 top-scoring samples.
        recall@100: recall computed on 100 top-scoring samples.
        median_rank@50: median rank computed on 50 top-scoring samples.
        median_rank@100: median rank computed on 100 top-scoring samples.
    """
        if self._num_gt_instances == 0:
            logging.warning('No ground truth instances')

        if not self._scores:
            scores = np.array([], dtype=float)
            tp_fp_labels = np.array([], dtype=bool)
        else:
            scores = np.concatenate(self._scores)
            tp_fp_labels = np.concatenate(self._tp_fp_labels)
            relation_field_values = np.concatenate(self._relation_field_values)

        for relation_field_value, _ in (six.iteritems(
                self._num_gt_instances_per_relationship)):
            precisions, recalls = metrics.compute_precision_recall(
                scores[relation_field_values == relation_field_value],
                tp_fp_labels[relation_field_values == relation_field_value],
                self._num_gt_instances_per_relationship[relation_field_value])
            self._average_precisions[
                relation_field_value] = metrics.compute_average_precision(
                    precisions, recalls)

        self._mean_average_precision = np.mean(
            list(self._average_precisions.values()))

        self._precisions, self._recalls = metrics.compute_precision_recall(
            scores, tp_fp_labels, self._num_gt_instances)
        self._weighted_average_precision = metrics.compute_average_precision(
            self._precisions, self._recalls)

        self._recall_50 = (metrics.compute_recall_at_k(self._tp_fp_labels,
                                                       self._num_gt_instances,
                                                       50))
        self._median_rank_50 = (metrics.compute_median_rank_at_k(
            self._tp_fp_labels, 50))
        self._recall_100 = (metrics.compute_recall_at_k(
            self._tp_fp_labels, self._num_gt_instances, 100))
        self._median_rank_100 = (metrics.compute_median_rank_at_k(
            self._tp_fp_labels, 100))

        return VRDDetectionEvalMetrics(
            self._weighted_average_precision, self._mean_average_precision,
            self._average_precisions, self._precisions, self._recalls,
            self._recall_50, self._recall_100, self._median_rank_50,
            self._median_rank_100)
Exemple #6
0
def remove_training_directory(train_dir, task):
    """Removes the training directory."""
    try:
        logging.warning("%s: Removing existing train directory.",
                        task_as_string(task))
        gfile.DeleteRecursively(train_dir)
    except:
        logging.error(
            "%s: Failed to delete directory " + train_dir +
            " when starting a new model. Please delete it manually and" +
            " try again.", task_as_string(task))
    def add_single_detected_image_info(self, image_id, detections_dict):
        """Adds detections for a single image to be used for evaluation.

    Args:
      image_id: A unique string/integer identifier for the image.
      detections_dict: A dictionary containing -
        standard_fields.DetectionResultFields.detection_boxes: A numpy array of
          structures with shape [N, 1], representing N tuples, each tuple
          containing the same number of named bounding boxes.
          Each box is of the format [y_min, x_min, y_max, x_max] (as an example
          see datatype vrd_box_data_type, single_box_data_type above).
        standard_fields.DetectionResultFields.detection_scores: float32 numpy
          array of shape [N] containing detection scores for the boxes.
        standard_fields.DetectionResultFields.detection_classes: A numpy array
          of structures shape [N, 1], representing the class labels of the
          corresponding bounding boxes and possibly additional classes (see
          datatype label_data_type above).
    """
        if image_id not in self._image_ids:
            logging.warning('No groundtruth for the image with id %s.',
                            image_id)
            # Since for the correct work of evaluator it is assumed that groundtruth
            # is inserted first we make sure to break the code if is it not the case.
            self._image_ids.update([image_id])
            self._negative_labels[image_id] = np.array([])
            self._evaluatable_labels[image_id] = np.array([])

        num_detections = detections_dict[
            standard_fields.DetectionResultFields.detection_boxes].shape[0]
        detection_class_tuples = detections_dict[
            standard_fields.DetectionResultFields.detection_classes]
        detection_box_tuples = detections_dict[
            standard_fields.DetectionResultFields.detection_boxes]
        negative_selector = np.zeros(num_detections, dtype=bool)
        selector = np.ones(num_detections, dtype=bool)
        # Only check boxable labels
        for field in detection_box_tuples.dtype.fields:
            # Verify if one of the labels is negative (this is sure FP)
            negative_selector |= np.isin(detection_class_tuples[field],
                                         self._negative_labels[image_id])
            # Verify if all labels are verified
            selector &= np.isin(detection_class_tuples[field],
                                self._evaluatable_labels[image_id])
        selector |= negative_selector
        self._evaluation.add_single_detected_image_info(
            image_key=image_id,
            detected_box_tuples=self._process_detection_boxes(
                detection_box_tuples[selector]),
            detected_scores=detections_dict[
                standard_fields.DetectionResultFields.detection_scores]
            [selector],
            detected_class_tuples=detection_class_tuples[selector])
Exemple #8
0
def predict(model_name,
            model=None,
            row_start=None,
            row_end=None,
            custom_objects=None):
    if 'batch_size' not in config:
        config['batch_size'] = default_batch_size
    if 'max_queue_size' not in config:
        config['max_queue_size'] = default_max_queue_size
    if 'does_use_multiprocessing' not in config:
        config['does_use_multiprocessing'] = default_does_use_multiprocessing
    if 'worker_number' not in config:
        config['worker_number'] = default_worker_number
    if 'verbose' not in config:
        config['verbose'] = default_verbose
    if model is None:
        if custom_objects is None:
            custom_objects = custom_metrics
        model = load_model(model_name=model_name,
                           custom_objects=custom_objects,
                           does_compile=True)
        if model is None:
            raise NoTrainedModelException(model_name)
    rolling_window_size = get_rolling_window_size(model_name)
    generator = DataGenerator(
        dataset_name=DATASET_NAME_PREDICT,
        rolling_window_size=rolling_window_size,
        row_start=row_start,
        row_end=row_end,
        max_batch_size=config['batch_size'],
        does_shuffle=False,  # NOT shuffle!
    )
    snpr = generator.get_sample_number_per_row()
    if config['batch_size'] % snpr != 0:
        logging.warning(
            'predict: batch_size(%d) cannot divide %d. '
            'Some inputs will be ignored.', config['batch_size'], snpr)

    result = model.predict_generator(
        generator=generator,
        max_queue_size=config['max_queue_size'],
        use_multiprocessing=config['does_use_multiprocessing'],
        workers=config['worker_number'],
        verbose=config['verbose'],
    )
    return result
Exemple #9
0
    def next_batch(self):
        """Return a Batch from the batch queue.

        If mode='infer' then each batch contains a single example repeated beam_size-many times; this is necessary for beam search.

        Returns:
          batch: a Batch object, or None if we're in single_pass mode and we've exhausted the dataset.
        """
        # If the batch queue is empty, print a warning
        if self._batch_queue.qsize() == 0:
            log.warning(
                'Bucket input queue is empty when calling next_batch. Bucket queue size: %i, Input queue size: %i',
                self._batch_queue.qsize(), self._example_queue.qsize())
            if self._single_pass and self._finished_reading:
                log.info("Finished reading dataset in single_pass mode.")
                return None

        batch = self._batch_queue.get()  # get the next Batch
        return batch
Exemple #10
0
def get_tfrecord_files(train_or_eval_files, num_workers=1):
    """Split dataset by worker.

  Args:
      num_workers: String, the name of the dataset.
      file_pattern: The file pattern to use for matching the dataset source files.

  Returns:
      A file list.

  Raises:
      ValueError: If the dataset is unknown.
  """

    if FLAGS.dataset_name == 'mock':
        return []
    ret = []
    all_tfrecord_files = []
    dataset_dir = FLAGS.dataset_dir
    if dataset_dir is None:
        raise ValueError('Need to specify dataset, mock or real.')

    assert train_or_eval_files is not None
    files_list = train_or_eval_files.split(',')
    for file_name in files_list:
        all_tfrecord_files.append(os.path.join(dataset_dir, file_name))
    if (len(all_tfrecord_files) // num_workers) <= 0:
        raise ValueError('Require num_training_files_per_worker > 0 with num_training_files({}) < num_workers({}).'\
                         .format(len(all_tfrecord_files), num_workers))
    if len(all_tfrecord_files) % num_workers > 0:
        logging.warning(
            "{} files can not be distributed equally between {} workers.".
            format(len(all_tfrecord_files), num_workers))
    all_tfrecord_files.sort()
    for i in range(len(all_tfrecord_files)):
        if i % num_workers == FLAGS.task_index:
            ret.append(all_tfrecord_files[i])
    logging.info('Worker Host {} handles {} files including {}.'.format(
        FLAGS.task_index, len(ret), ret))
    return ret
Exemple #11
0
def get_vars_available_in_ckpt(name_to_var_map,
                               checkpoint_path,
                               include_global_step=True):
    """Returns the variable name to variable mapping used to initialize an
  `tf.train.Saver` object. 

  Inspects given checkpoint and returns the subset of variables that are
  available in it.

  Args:
    name_to_var_map: a dict mapping from variable name to variable. 
    checkpoint_path: string scalar, path to the checkpoint to restore variables 
      from.
    include_global_step: bool scalar, whether to include `global_step` variable,
      if exists. Defaults to True.

  Returns:
    vars_in_ckpt: a dict mapping from variable name to variable.
  """

    reader = tf.train.NewCheckpointReader(checkpoint_path)
    vars_to_shape_map = reader.get_variable_to_shape_map()

    if not include_global_step:
        vars_to_shape_map.pop(tf.GraphKeys.GLOBAL_STEP, None)

    vars_in_ckpt = {}
    for var_name, var in sorted(name_to_var_map.items()):
        if var_name in vars_to_shape_map:
            if vars_to_shape_map[var_name] == var.shape.as_list():
                vars_in_ckpt[var_name] = var
            else:
                logging.warning(
                    'Variable [%s] is available in checkpoint, but has an '
                    'incompatible shape with model variable.', var_name)
        else:
            logging.warning('Variable [%s] is not available in checkpoint',
                            var_name)

    return vars_in_ckpt
Exemple #12
0
    def text_generator(self, example_generator):
        """Generates article and abstract text from tf.Example.

        Args:
          example_generator: a generator of tf.Examples from file. See data.example_generator"""
        while True:
            e = next(example_generator)  # e is a tf.Example
            try:
                article_text = e.features.feature['article'].bytes_list.value[
                    0].decode(
                    )  # the article text was saved under the key 'article' in the data files
                abstract_text = e.features.feature[
                    'abstract'].bytes_list.value[0].decode(
                    )  # the abstract text was saved under the key 'abstract' in the data files
            except ValueError:
                log.error('Failed to get article or abstract from example')
                continue
            if len(
                    article_text
            ) == 0:  # See https://github.com/abisee/pointer-generator/issues/1
                log.warning(
                    'Found an example with empty article text. Skipping it.')
            else:
                yield (article_text, abstract_text)
def get_meta_filename(start_new_model, train_dir, task):
    if start_new_model:
        logging.warning(
            "%s: Flag 'start_new_model' is set. Building a new model.",
            task_as_string(task))
        return None

    latest_checkpoint = tf.train.latest_checkpoint(train_dir)
    if not latest_checkpoint:
        logging.warning("%s: No checkpoint file found. Building a new model.",
                        task_as_string(task))
        return None

    meta_filename = latest_checkpoint + ".meta"
    if not gfile.Exists(meta_filename):
        logging.warning("%s: No meta graph file found. Building a new model.",
                        task_as_string(task))
        return None
    else:
        return meta_filename
def recover_model(task, meta_filename):
    logging.warning("%s: Restoring from meta graph file %s",
                    task_as_string(task), meta_filename)
    return tf.train.import_meta_graph(meta_filename)
def main(unused_argv):
    env = json.loads(os.environ.get("TF_CONFIG", "{}"))

    cluster_data = env.get("cluster", None)
    cluster = tf.train.ClusterSpec(cluster_data) if cluster_data else None

    task_data = env.get("task", None) or {"type": "master", "index": 0}
    task = type("TaskSpec", (object, ), task_data)
    is_master = (task.type == "master" and task.index == 0)
    train_dir = FLAGS.train_dir
    D = 4  # input dimensionality

    if cluster:
        logging.warning("%s: Starting trainer within cluster %s.",
                        task_as_string(task), cluster.as_dict())
        server = start_server(cluster, task)
        target = server.target
        device_fn = tf.train.replica_device_setter(
            ps_device="/job:ps",
            worker_device="/job:%s/task:%d" % (task.type, task.index),
            cluster=cluster)
    else:
        target = ""
        device_fn = ""

    config = tf.ConfigProto(allow_soft_placement=True,
                            log_device_placement=False)

    if not cluster or task.type == "master" or task.type == "worker":
        model = find_class_by_name(FLAGS.model, [models])()

        with tf.Graph().as_default() as graph:
            meta_filename = get_meta_filename(False, train_dir, task)
            if meta_filename:
                logging.warning("using saved model %s", meta_filename)
                saver = recover_model(task, meta_filename)
            else:
                raise ("meta file not found")

            with tf.device(device_fn):
                init = tf.global_variables_initializer()
                global_step = tf.get_collection("global_step")[0]
                model.get_collection(global_step)

        summary_writer = tf.summary.FileWriter(FLAGS.train_dir,
                                               graph=tf.get_default_graph())

        sv = tf.train.Supervisor(graph,
                                 logdir=train_dir,
                                 init_op=init,
                                 is_chief=is_master,
                                 global_step=global_step,
                                 save_model_secs=3600,
                                 save_summaries_secs=120,
                                 saver=saver)

        # Launch the graph
        xs, hs, dlogps, drs, ys, tfps = [], [], [], [], [], []
        running_reward_sum = 0
        reward_sum = 0
        episode_number = 0
        total_episodes = 10

        logging.warning("%s: Starting managed session.", task_as_string(task))
        with sv.managed_session(target, config=config) as sess:

            env = env_wrapper.Service()
            rendering = False
            observation = env.reset(
            )  # Obtain an initial observation of the environment

            model.before(sess)
            while episode_number < total_episodes:

                # Make sure the observation is in a shape the network can handle.
                x = np.reshape(observation, [1, D])
                action = model.get_action(sess, x)

                # step the environment and get new measurements
                observation, reward, done, info = env.step(action)
                reward_sum += reward
                running_reward_sum += reward

                drs.append(
                    reward
                )  # record reward (has to be done after we call step() to get reward for previous action)
                if done:
                    episode_number += 1
                    logging.info(
                        'Reward for episode %d of 200 = %f.  Total average reward %f.'
                        % (episode_number, reward_sum,
                           running_reward_sum / episode_number))
                    reward_sum = 0
                    observation = env.reset()
                    done = False

            print ""
            logging.info(
                'Total reward: %d, Avg reward: %f' %
                (running_reward_sum, running_reward_sum / total_episodes))

            avg_reward_sum = running_reward_sum / total_episodes
            summary_writer.add_summary(
                MakeSummary("GlobalStep/Eval_TotalRewardSum",
                            running_reward_sum), running_reward_sum)
            summary_writer.add_summary(
                MakeSummary("GlobalStep/Eval_AvgRewardSum", avg_reward_sum),
                avg_reward_sum)
            summary_writer.flush()

            import getpass
            results = env.submit(conf.kaggle_user, conf.kaggle_passwd)
            print results
Exemple #16
0
def define_data_input(model, queue_batch=None):
  """Adds TF ops to load input data."""

  label_volume_map = {}
  for vol in FLAGS.label_volumes.split(','):
    volname, path, dataset = vol.split(':')
    label_volume_map[volname] = h5py.File(path)[dataset]

  image_volume_map = {}
  for vol in FLAGS.data_volumes.split(','):
    volname, path, dataset = vol.split(':')
    image_volume_map[volname] = h5py.File(path)[dataset]

  if queue_batch is None:
    queue_batch = FLAGS.batch_size

  # Fetch sizes of images and labels
  label_size = train_labels_size(model)
  image_size = train_image_size(model)

  label_radii = (label_size // 2).tolist()
  label_size = label_size.tolist()
  image_radii = (image_size // 2).tolist()
  image_size = image_size.tolist()

  # Fetch a single coordinate and volume name from a queue reading the
  # coordinate files or from saved hard/important examples
  import os.path
  if os.path.isfile(FLAGS.train_coords):
    logging.info('{} exists.'.format(FLAGS.train_coords))
  else:
    logging.error('{} does not exist.'.format(FLAGS.train_coords))
  if FLAGS.sharding_rule == 0:
    coord, volname = inputs.load_patch_coordinates(FLAGS.train_coords)
  elif FLAGS.sharding_rule == 1 and 'horovod' in sys.modules:
    d = tf.data.TFRecordDataset(FLAGS.train_coords, compression_type='GZIP')
    d = d.shard(hvd.size(), hvd.rank())
    d = d.map(parser_fn)
    iterator = d.make_one_shot_iterator()
    coord, volname = iterator.get_next()
  else:
    logging.warning("You need to install Horovod to use sharding. Turning sharding off..")
    FLAGS.sharding_rule = 0
    coord, volname = inputs.load_patch_coordinates(FLAGS.train_coords)

 # Load object labels (segmentation).
  labels = inputs.load_from_numpylike(
      coord, volname, label_size, label_volume_map)

  label_shape = [1] + label_size[::-1] + [1]
  #label_shape = [1] + [1] + label_size[::-1] # NCDHW
  labels = tf.reshape(labels, label_shape)

  loss_weights = tf.constant(np.ones(label_shape, dtype=np.float32))

  # Load image data.
  patch = inputs.load_from_numpylike(
      coord, volname, image_size, image_volume_map)
  data_shape = [1] + image_size[::-1] + [1]
  patch = tf.reshape(patch, shape=data_shape)

  if ((FLAGS.image_stddev is None or FLAGS.image_mean is None) and
      not FLAGS.image_offset_scale_map):
    raise ValueError('--image_mean, --image_stddev or --image_offset_scale_map '
                     'need to be defined')

  # Convert segmentation into a soft object mask.
  lom = tf.logical_and(
      labels > 0,
      tf.equal(labels, labels[0,
                              label_radii[2],
                              label_radii[1],
                              label_radii[0],
                              0]))
  labels = inputs.soften_labels(lom)

  # Apply basic augmentations.
  transform_axes = augmentation.PermuteAndReflect(
      rank=5, permutable_axes=_get_permutable_axes(),
      reflectable_axes=_get_reflectable_axes())
  labels = transform_axes(labels)
  patch = transform_axes(patch)
  loss_weights = transform_axes(loss_weights)

  # Normalize image data.
  patch = inputs.offset_and_scale_patches(
      patch, volname[0],
      offset_scale_map=_get_offset_and_scale_map(),
      default_offset=FLAGS.image_mean,
      default_scale=FLAGS.image_stddev)

  # Create a batch of examples. Note that any TF operation before this line
  # will be hidden behind a queue, so expensive/slow ops can take advantage
  # of multithreading.
  #MK TODO: check num_threads usage here
  patches, labels, loss_weights = tf.train.shuffle_batch(
      [patch, labels, loss_weights], queue_batch,
      num_threads=max(1, FLAGS.batch_size // 2),
      capacity=32 * FLAGS.batch_size,
      min_after_dequeue=4 * FLAGS.batch_size,
      enqueue_many=True)

  return patches, labels, loss_weights, coord, volname
Exemple #17
0
    def __init__(self,
                 num_units,
                 num_dims=1,
                 input_dims=None,
                 output_dims=None,
                 priority_dims=None,
                 non_recurrent_dims=None,
                 tied=False,
                 cell_fn=None,
                 non_recurrent_fn=None,
                 state_is_tuple=True,
                 output_is_tuple=True):
        """Initialize the parameters of a Grid RNN cell

        Args:
          num_units: int, The number of units in all dimensions of this GridRNN cell
          num_dims: int, Number of dimensions of this grid.
          input_dims: int or list, List of dimensions which will receive input data.
          output_dims: int or list, List of dimensions from which the output will be
            recorded.
          priority_dims: int or list, List of dimensions to be considered as
            priority dimensions.
                  If None, no dimension is prioritized.
          non_recurrent_dims: int or list, List of dimensions that are not
            recurrent.
                  The transfer function for non-recurrent dimensions is specified
                    via `non_recurrent_fn`, which is
                    default to be `tensorflow.nn.relu`.
          tied: bool, Whether to share the weights among the dimensions of this
            GridRNN cell.
                  If there are non-recurrent dimensions in the grid, weights are
                    shared between each group of recurrent and non-recurrent
                    dimensions.
          cell_fn: function, a function which returns the recurrent cell object.
            Has to be in the following signature:
                  ```
                  def cell_func(num_units):
                    # ...
                  ```
                  and returns an object of type `RNNCell`. If None, LSTMCell with
                    default parameters will be used.
            Note that if you use a custom RNNCell (with `cell_fn`), it is your
            responsibility to make sure the inner cell use `state_is_tuple=True`.

          non_recurrent_fn: a tensorflow Op that will be the transfer function of
            the non-recurrent dimensions
          state_is_tuple: If True, accepted and returned states are tuples of the
            states of the recurrent dimensions. If False, they are concatenated
            along the column axis. The latter behavior will soon be deprecated.

            Note that if you use a custom RNNCell (with `cell_fn`), it is your
            responsibility to make sure the inner cell use `state_is_tuple=True`.

          output_is_tuple: If True, the output is a tuple of the outputs of the
            recurrent dimensions. If False, they are concatenated along the
            column axis. The later behavior will soon be deprecated.

        Raises:
          TypeError: if cell_fn does not return an RNNCell instance.
        """
        if not state_is_tuple:
            logging.warning('%s: Using a concatenated state is slower and will '
                            'soon be deprecated.  Use state_is_tuple=True.', self)
        if not output_is_tuple:
            logging.warning('%s: Using a concatenated output is slower and will '
                            'soon be deprecated.  Use output_is_tuple=True.', self)

        if num_dims < 1:
            raise ValueError('dims must be >= 1: {}'.format(num_dims))

        self._config = _parse_rnn_config(num_dims, input_dims, output_dims,
                                         priority_dims, non_recurrent_dims,
                                         non_recurrent_fn or nn.selu, tied,
                                         num_units)

        self._state_is_tuple = state_is_tuple
        self._output_is_tuple = output_is_tuple

        if cell_fn is None:
            my_cell_fn = functools.partial(
                rnn.LSTMCell, num_units=num_units, state_is_tuple=state_is_tuple)
        else:
            def my_cell_fn(): return cell_fn(num_units)
        if tied:
            self._cells = [my_cell_fn()] * num_dims
        else:
            self._cells = [my_cell_fn() for _ in range(num_dims)]
        if not isinstance(self._cells[0], rnn.RNNCell):
            raise TypeError('cell_fn must return an RNNCell instance, saw: %s' %
                            type(self._cells[0]))

        if self._output_is_tuple:
            self._output_size = tuple(self._cells[0].output_size
                                      for _ in self._config.outputs)
        else:
            self._output_size = self._cells[0].output_size * \
                len(self._config.outputs)

        if self._state_is_tuple:
            self._state_size = tuple(self._cells[0].state_size
                                     for _ in self._config.recurrents)
        else:
            self._state_size = self._cell_state_size() * len(self._config.recurrents)
Exemple #18
0
def train_model(model_name,
                model,
                row_start=None,
                row_end=None,
                step=None,
                initial_epoch=0,
                end_epoch=1,
                time_limit=None):
    if initial_epoch >= end_epoch:
        logging.error('initial_epoch(%d) >= end_epoch(%d).')
        return None
    if 'batch_size' not in config:
        config['batch_size'] = default_batch_size
    if 'does_shuffle' not in config:
        config['does_shuffle'] = default_does_shuffle
    if 'callbacks' not in config:
        config['callbacks'] = default_callbacks
    if 'monitored_loss_name' not in config:
        config['monitored_loss_name'] = default_monitored_loss_name
    if 'max_queue_size' not in config:
        config['max_queue_size'] = default_max_queue_size
    if 'does_use_multiprocessing' not in config:
        config['does_use_multiprocessing'] = default_does_use_multiprocessing
    if 'worker_number' not in config:
        config['worker_number'] = default_worker_number
    if 'verbose' not in config:
        config['verbose'] = default_verbose
    callbacks = list() if config['callbacks'] is not None else None
    if callbacks is not None:
        for cb in config['callbacks']:
            if isinstance(cb, keras.callbacks.Callback):
                if isinstance(cb, TimeLimiter) and time_limit is not None:
                    logging.warning(
                        'train_model: parameter time_limit is not None, ignored TimeLimiter in config.'
                    )
                    continue
                callbacks.append(cb)
            elif isinstance(cb, str):
                cb_str = cb.lower()
                cb_str = re.sub(pattern=_remove_pattern,
                                repl='',
                                string=cb_str)
                sep_idx = cb_str.find(':')
                cb_params = dict()
                if sep_idx >= 0:
                    cb_name = cb_str[:sep_idx]
                    cb_params_strs = cb_str[sep_idx + 1:].split(',')
                    for cb_param_str in cb_params_strs:
                        eq_idx = cb_param_str.find('=')
                        if eq_idx >= 0:
                            cb_params[
                                cb_param_str[:eq_idx]] = cb_param_str[eq_idx +
                                                                      1:]
                        else:
                            cb_params[cb_param_str] = '1'
                else:
                    cb_name = cb_str
                if cb_name == 'earlystopping':
                    es_monitor = config[
                        'monitored_loss_name'] if 'monitor' not in cb_params else cb_params[
                            'monitor']
                    if 'baseline' not in cb_params:
                        _, es_baseline = load_best_info(
                            model_name=model_name, monitor_name=es_monitor)
                    else:
                        es_baseline = float(cb_params['baseline'])
                    callbacks.append(
                        keras.callbacks.EarlyStopping(
                            monitor=es_monitor,
                            min_delta=EPSILON if 'min_delta' not in cb_params
                            else float(cb_params['min_delta']),
                            patience=2 if 'patience' not in cb_params else int(
                                cb_params['patience']),
                            verbose=1 if 'verbose' not in cb_params else int(
                                cb_params['verbose']),
                            mode='min'
                            if 'mode' not in cb_params else cb_params['mode'],
                            baseline=es_baseline,
                        ))
                elif cb_name == 'tensorboard':
                    callbacks.append(
                        keras.callbacks.TensorBoard(
                            log_dir=os.path.join(LOG_DIRECTORY, model_name)
                            if 'log_dir' not in cb_params else
                            cb_params['log_dir'],
                            batch_size=config['batch_size'],
                            write_graph=True if 'write_graph' not in cb_params
                            else str_to_bool(cb_params['write_graph']),
                        ))
                elif cb_name == 'modelsaver':
                    callbacks.append(
                        ModelSaver(
                            model_name=model_name,
                            period=1 if 'period' not in cb_params else int(
                                cb_params['period']),
                            verbose=1 if 'verbose' not in cb_params else int(
                                cb_params['verbose']),
                        ))
                elif cb_name == 'epochnumbersaver':
                    callbacks.append(
                        EpochNumberSaver(
                            model_name=model_name,
                            verbose=1 if 'verbose' not in cb_params else int(
                                cb_params['verbose']),
                        ))
                elif cb_name == 'bestinfosaver':
                    bi_monitor = config[
                        'monitored_loss_name'] if 'monitor' not in cb_params else cb_params[
                            'monitor']
                    if 'baseline' not in cb_params:
                        _, bi_baseline = load_best_info(
                            model_name=model_name, monitor_name=bi_monitor)
                    else:
                        bi_baseline = float(cb_params['baseline'])
                    callbacks.append(
                        BestInfoSaver(
                            model_name=model_name,
                            monitor=bi_monitor,
                            mode='min'
                            if 'mode' not in cb_params else cb_params['mode'],
                            baseline=bi_baseline,
                            verbose=1 if 'verbose' not in cb_params else int(
                                cb_params['verbose']),
                        ))
                elif cb_name == 'timelimiter':
                    if time_limit is not None:
                        logging.warning(
                            'train_model: parameter time_limit is not None, ignored TimeLimiter in config.'
                        )
                        continue
                    if 'limit' not in cb_params:
                        raise ValueError(
                            "TimeLimiter's parameter limit is missed.")
                    callbacks.append(
                        TimeLimiter(
                            limit=cb_params['limit'],
                            verbose=1 if 'verbose' not in cb_params else int(
                                cb_params['verbose']),
                        ))
                else:
                    raise UnknownCallbackNameException(cb)
            else:
                raise TypeError(
                    'Callback must be an instance of keras.callbacks.Callback or a callback name(string).'
                )
    if time_limit is not None:
        callbacks.append(TimeLimiter(limit=time_limit, verbose=1))
    rolling_window_size = get_rolling_window_size(model_name)
    generator = SquareExDataGenerator(
        dataset_name=DATASET_NAME_TRAIN,
        rolling_window_size=rolling_window_size,
        row_start=row_start,
        row_end=row_end,
        step=step,
        max_batch_size=config['batch_size'],
        does_shuffle=config['does_shuffle'],
    )
    history = model.fit_generator(
        generator=generator,
        epochs=end_epoch,
        verbose=config['verbose'],
        callbacks=callbacks,
        max_queue_size=config['max_queue_size'],
        use_multiprocessing=config['does_use_multiprocessing'],
        workers=config['worker_number'],
        initial_epoch=initial_epoch,
    )
    return history
Exemple #19
0
def analyze_video(video_file, frame_iterator, clip_iterator, video_predictors,
                  lda_model, lda_vectorizer):
    """Uses yt8m model to analyze video clips of the video file.

  Args:
    video_file: Path to video file (e.g. mp4)
    frame_iterator: An instance of FrameIterator.
    clip_iterator: An instance of ClipIterator.
    video_predictors: A list of VideoPredictors.

  Returns:
    A python dict involving the results.
  """
    # Decode video frames from the raw video file.
    rgb_images = []
    for rgb in frame_iterator.frame_iterator(video_file,
                                             every_ms=1000.0 /
                                             FLAGS.frames_per_second):
        rgb_images.append(rgb)

    if not rgb_images:
        logging.warning('Could not get features for %s.', video_file)
        return None

    # Split video frames into video clips.
    video_id = video_file.split('/')[-1].split('.')[0]
    clips = [clip for clip in clip_iterator.clip_iterator(rgb_images)]

    # Downsample the video clips.
    if FLAGS.sample_every_n > 1:
        clips = _downsample(clips, FLAGS.sample_every_n)

    # Predict results.
    for clip in clips:
        predictions = []
        for predictor in video_predictors:
            predictions.extend(predictor.predict(clip['rgb_images']))
        clip['predictions'] = predictions

        # LDA.
        words = set()
        for prediction in clip['predictions']:
            if prediction['name'].lower() == 'symbol':
                continue
            for result in prediction['results']:
                if result['word']:
                    words.add(result['word'])

        document_word_mat = lda_vectorizer.transform([list(words)])
        document_topic_mat = lda_model.transform(document_word_mat)
        topic_word_mat = lda_model.components_ / lda_model.components_.sum(
            axis=1)[:, numpy.newaxis]

        document_word_reconstruct = numpy.matmul(document_topic_mat,
                                                 topic_word_mat)

        lda_vocab = lda_vectorizer.get_feature_names()
        lda_words = [
            lda_vocab[x]
            for x in document_word_reconstruct.argsort()[0][::-1][:5]
        ]
        clip['lda_words'] = lda_words

    return {'video_id': video_id, 'clips': clips}
Exemple #20
0
def main(unused_argv):
    env = json.loads(os.environ.get("TF_CONFIG", "{}"))

    cluster_data = env.get("cluster", None)
    cluster = tf.train.ClusterSpec(cluster_data) if cluster_data else None

    task_data = env.get("task", None) or {"type": "master", "index": 0}
    task = type("TaskSpec", (object, ), task_data)
    is_master = (task.type == "master" and task.index == 0)
    train_dir = FLAGS.train_dir

    if cluster:
        logging.warning("%s: Starting trainer within cluster %s.",
                        task_as_string(task), cluster.as_dict())
        server = start_server(cluster, task)
        target = server.target
        device_fn = tf.train.replica_device_setter(
            ps_device="/job:ps",
            worker_device="/job:%s/task:%d" % (task.type, task.index),
            cluster=cluster)
    else:
        target = ""
        device_fn = ""

    config = tf.ConfigProto(allow_soft_placement=True,
                            log_device_placement=False)

    if is_master and FLAGS.start_new_model:
        remove_training_directory(train_dir, task)

    if not cluster or task.type == "master" or task.type == "worker":
        env = gym.make('CartPole-v0')
        model = find_class_by_name(FLAGS.model, [models])()

        batch_size = FLAGS.batch_size  # every how many episodes to do a param update?
        last_model_export_step = 0
        export_model_steps = FLAGS.export_model_steps

        with tf.Graph().as_default() as graph:
            meta_filename = get_meta_filename(FLAGS.start_new_model, train_dir,
                                              task)
            if meta_filename:
                logging.warning("using saved model %s", meta_filename)
                saver = recover_model(task, meta_filename)

            with tf.device(device_fn):
                if not meta_filename:
                    global_step = tf.Variable(0,
                                              trainable=False,
                                              name="global_step")
                    local_device_protos = device_lib.list_local_devices()
                    gpus = [
                        x.name for x in local_device_protos
                        if x.device_type == 'GPU'
                    ]
                    num_gpus = len(gpus)
                    if num_gpus > 0:
                        logging.warning("Using the following GPUs to train: " +
                                        str(gpus))
                        num_towers = num_gpus
                        device_string = '/gpu:%d'
                    else:
                        logging.warning("No GPUs found. Training on CPU.")
                        num_towers = 1
                        device_string = '/cpu:%d'

                    for i in range(num_towers):
                        with (tf.variable_scope(
                            ("tower"), reuse=True if i > 0 else None)):
                            with (slim.arg_scope(
                                [slim.model_variable, slim.variable],
                                    device="/cpu:0"
                                    if num_gpus != 1 else "/gpu:0")):
                                results = model.build_graph(global_step)
                                model.add_to_collection(results)

                    model.collect()
                    tf.add_to_collection("global_step", global_step)
                    saver = tf.train.Saver(max_to_keep=0,
                                           keep_checkpoint_every_n_hours=0.25)

                init = tf.global_variables_initializer()
                global_step = tf.get_collection("global_step")[0]
                model.get_collection(global_step)

        sv = tf.train.Supervisor(graph,
                                 logdir=train_dir,
                                 init_op=init,
                                 is_chief=is_master,
                                 global_step=global_step,
                                 save_model_secs=3600,
                                 save_summaries_secs=120,
                                 saver=saver)

        # Launch the graph
        running_reward = None
        reward_sum = 0
        episode_number = 1
        total_episodes = FLAGS.total_episodes
        D = 4  #input dimensionality

        logging.warning("%s: Starting managed session.", task_as_string(task))
        with sv.managed_session(target, config=config) as sess:
            rendering = FLAGS.rendering
            observation = env.reset(
            )  # Obtain an initial observation of the environment

            model.before(sess)

            while episode_number <= total_episodes:
                if rendering:
                    env.render()
                    time.sleep(1. / 24)

                # Make sure the observation is in a shape the network can handle.
                x = np.reshape(observation, [1, D])

                # Run the policy network and get an action to take.
                action = model.get_action(sess, x)

                # step the environment and get new measurements
                observation, reward, done, info = env.step(action)
                reward_sum += reward

                model.after_action(sess, reward, info)
                if done:
                    episode_number += 1

                    global_step_val = model.after_episode(sess)

                    # If we have completed enough episodes, then update the policy network with our gradients.
                    if episode_number % batch_size == 0:
                        model.after_batch(sess)

                        # Give a summary of how well our network is doing for each batch of episodes.
                        running_reward = reward_sum if running_reward is None else running_reward * 0.99 + reward_sum * 0.01
                        logging.info(
                            'Global step %d. Average reward for episode %f.  Total average reward %f.'
                            % (global_step_val, reward_sum / batch_size,
                               running_reward / batch_size))
                        if reward_sum / batch_size > 200:
                            logging.info("Task solved in", episode_number,
                                         'episodes!')
                            break

                        reward_sum = 0

                    time_to_export = ((last_model_export_step == 0) or
                                      (global_step_val - last_model_export_step
                                       >= export_model_steps))

                    if is_master and time_to_export:
                        last_checkpoint = saver.save(sess, sv.save_path,
                                                     global_step_val)
                        last_model_export_step = global_step_val

                    observation = env.reset()

            if is_master:
                last_checkpoint = saver.save(sess, sv.save_path,
                                             global_step_val)
                last_model_export_step = global_step_val

            model.after()