Python log_every_n_seconds Exemples, absl.logging.log_every_n_seconds Python Exemples

Exemple #1

0

Afficher le fichier

 def InitializeSubProcesses(self):
     """Initialize subprocess modules based on config.yaml."""
     if self.config.get('gps'):
         self.gps = gps_sensor.GPSProcess(self.config, self.point_queue)
         while self.point_queue.empty():
             self.point = gps_pb2.Point().FromString(self.point_queue.get())
             logging.log_every_n_seconds(
                 logging.INFO,
                 'Waiting for GPS fix to determine track before starting other '
                 'sensor subprocesses', 10)
             break
     self.ProcessSession()
     if self.config.get('accelerometer'):
         self.accel = accelerometer.AccelerometerProcess(
             self.config, self.point_queue)
     if self.config.get('gyroscope'):
         self.gyro = gyroscope.GyroscopeProcess(self.config,
                                                self.point_queue)
     if self.config.get('labjack'):
         self.labjack = labjack.Labjack(self.config, self.point_queue)
     if self.config.get('tire_temps'):
         self.tire_temps = tire_temperature.MultiTireInterface(
             self.config, self.point_queue)
     if self.config.get('wbo2'):
         self.wbo2 = wbo2.WBO2(self.config, self.point_queue)
     if self.config.get('timescale'):
         self.timescale = timescale.Timescale(
             timescale.CreateSession(self.session))
     if self.config.get('rtmp_overlay'):
         self.rtmp_overlay = rtmp_overlay.RTMPOverlay(self.config)

Exemple #2

0

Afficher le fichier

 def ReadValues(self):
   """Reads the labjack voltages."""
   try:
     if self.config.get('labjack'):
       point = gps_pb2.Point()
       point.labjack_temp_f = (
           self.u3.getTemperature() * 9.0/5.0 - 459.67)
       for input_name, proto_field in self.config['labjack'].items():
         if input_name.startswith('ain') or input_name.startswith('fio'):
           channel = int(input_name[-1])
           # Note getFeedback(u3.AIN(4)) is reading voltage from FIO4.
           # Physically AIN4 and FIO4 are identical.  AIN is for analog input
           # and FIO is flexible input/output.
           feedback = self.u3.getFeedback(u3.AIN(channel))[0]
           is_low_voltage = channel not in self.HIGH_VOLTAGE_CHANNELS
           voltage = self.u3.binaryToCalibratedAnalogVoltage(
               feedback,
               isLowVoltage=is_low_voltage,
               channelNumber=channel)
           if input_name in self.config['labjack'].get('tick_divider_10'):
             voltage = voltage * 10
           setattr(point, proto_field, voltage)
       self.AddPointToQueue(point)
   except u3.LabJackException:
     stack_trace = ''.join(traceback.format_exception(*sys.exc_info()))
     logging.log_every_n_seconds(logging.ERROR,
                                 'Error reading labjack values\n%s',
                                 10,
                                 stack_trace)

Exemple #3

0

Afficher le fichier

def afkmc2(data, k, m=20):
    """Implementation of Fast and Provably Good Seedings for k-Means https://las.inf.ethz.ch/files/bachem16fast.pdf """
    n = data.shape[0]
    c1 = np.random.randint(data.shape[0])
    c1_vec = np.expand_dims(data[c1], 0)
    q_nom = np.squeeze(euc_dist_batched(c1_vec, data))
    q_denom = np.sum(q_nom)
    q = 0.5 * q_nom / q_denom + 1.0 / (2.0 * n)
    indices = np.arange(n)
    c_i_minus_1 = np.zeros((k, data.shape[1]), dtype=np.float32)
    c_i_minus_1[0, :] = c1_vec
    for i in range(1, k):
        logging.log_every_n_seconds(logging.INFO, 'afkmc2 processed %s of %s', 5, i, k)
        x_ind = np.random.choice(indices, p=q)
        x = np.expand_dims(data[x_ind], 0)
        d_x = np.min(np.squeeze(euc_dist_batched(x, c_i_minus_1[:i])))
        for j in range(1, m):
            y_ind = np.random.choice(indices, p=q)
            y = np.expand_dims(data[y_ind], 0)
            d_y = np.min(np.squeeze(euc_dist_batched(y, c_i_minus_1[:i])))
            if ((d_y * q[x_ind]) / (d_x * q[y_ind])) > np.random.rand():
                x = y
                d_x = d_y
        c_i_minus_1[i] = x
    return c_i_minus_1

Exemple #4

0

Afficher le fichier

Fichier : base_client.py Projet : suryaavala/tfx

  def WaitUntilModelLoaded(self, deadline: float,
                           polling_interval_sec: int) -> None:
    """Wait until model is loaded and available.

    Args:
      deadline: A deadline time in UTC timestamp (in seconds).
      polling_interval_sec: GetServingStatus() polling interval.

    Raises:
      DeadlineExceeded: When deadline exceeded before model is ready.
      ValidationFailed: If validation failed explicitly.
    """
    while time.time() < deadline:
      status = self._GetServingStatus()
      if status == types.ModelServingStatus.NOT_READY:
        logging.log_every_n_seconds(
            level=logging.INFO,
            n_seconds=10,
            msg='Waiting for model to be loaded...')
        time.sleep(polling_interval_sec)
        continue
      elif status == types.ModelServingStatus.UNAVAILABLE:
        raise error_types.ValidationFailed(
            'Model server failed to load the model.')
      else:
        logging.info('Model is successfully loaded.')
        return

    raise error_types.DeadlineExceeded(
        'Deadline exceeded while waiting the model to be loaded.')

Exemple #5

0

Afficher le fichier

Fichier : model.py Projet : PatentsView/PatentsView-Disambiguation

 def __init__(self, entity_info_file, name, get_field, norm=None):
     logging.info('building entity kb...')
     with open(entity_info_file, 'rb') as f:
         [self.entity_ids, self.entity_names] = pickle.load(f)
     self.emap = dict()
     self.missing_entities = ['army', 'navy']
     if not os.path.exists(entity_info_file + '.cache.pkl'):
         for idx in range(len(self.entity_ids)):
             logging.log_first_n(logging.INFO, 'entity kb: %s -> %s', 10, self.entity_names[idx], idx)
             logging.log_every_n_seconds(logging.INFO, 'entity kb: %s of %s', 10, idx, len(self.entity_ids))
             self.emap[self.entity_names[idx].lower()] = idx
             normalized = normalize_name(self.entity_names[idx])
             splt = split(normalized)
             cleaned = clean(splt)
             nostop = remove_stopwords(cleaned)
             if normalized not in self.emap:
                 self.emap[normalized] = idx
             if splt not in self.emap:
                 self.emap[splt] = idx
             if cleaned not in self.emap:
                 self.emap[cleaned] = idx
             if nostop not in self.emap:
                 self.emap[nostop] = idx
         for me in self.missing_entities:
             self.emap[me] = len(self.emap)
         with open(entity_info_file + '.cache.pkl', 'wb') as fout:
             pickle.dump(self.emap, fout)
     else:
         with open(entity_info_file + '.cache.pkl', 'rb') as fin:
             self.emap = pickle.load(fin)
     self.name = name
     self.get_field = get_field
     logging.info('building entity kb...done')

Exemple #6

0

Afficher le fichier

Fichier : transformations.py Projet : vishalbelsare/dm_control

def _rmat_to_euler_xyz(rmat):
    """Converts a 3x3 rotation matrix to XYZ euler angles."""
    # | r00 r01 r02 |   |  cy*cz           -cy*sz            sy    |
    # | r10 r11 r12 | = |  cz*sx*sy+cx*sz   cx*cz-sx*sy*sz  -cy*sx |
    # | r20 r21 r22 |   | -cx*cz*sy+sx*sz   cz*sx+cx*sy*sz   cx*cy |
    if rmat[0, 2] > _POLE_LIMIT:
        logging.log_every_n_seconds(logging.WARNING, 'Angle at North Pole', 60)
        z = np.arctan2(rmat[1, 0], rmat[1, 1])
        y = np.pi / 2
        x = 0.0
        return np.array([x, y, z])

    if rmat[0, 2] < -_POLE_LIMIT:
        logging.log_every_n_seconds(logging.WARNING, 'Angle at South Pole', 60)
        z = np.arctan2(rmat[1, 0], rmat[1, 1])
        y = -np.pi / 2
        x = 0.0
        return np.array([x, y, z])

    z = -np.arctan2(rmat[0, 1], rmat[0, 0])
    y = np.arcsin(rmat[0, 2])
    x = -np.arctan2(rmat[1, 2], rmat[2, 2])

    # order of return is the order of input
    return np.array([x, y, z])

Exemple #7

0

Afficher le fichier

 def Loop(self):
   with serial.Serial('/dev/ttyUSB0', 19200) as ser:
     for frame in ReadSerial(ser):
       logging.log_every_n_seconds(logging.DEBUG, 'WBO2 Frame: %s',
                                   10, frame)
       for frame_key, point_value in self.config['wbo2'].items():
         self.values[point_value].value = GetBytes(frame, frame_key)

Exemple #8

0

Afficher le fichier

def batch_find_neighbors(X, nn: NearestNeighbors, batch_size=1000):
    res = np.zeros((X.shape[0], nn.n_neighbors - 1), dtype=np.int32)
    for i in range(0, X.shape[0], batch_size):
        logging.log_every_n_seconds(
            logging.INFO, 'Computed %s of %s neighbors' % (i, X.shape[0]), 5)
        res[i:(i + batch_size)] = nn.kneighbors(X[i:(i + batch_size), :],
                                                return_distance=False)[:, 1:]
    logging.info('Finished batch_find_neighbors')
    return res

Exemple #9

0

Afficher le fichier

    def test_sac_algorithm_discrete(self, use_parallel_network):
        num_env = 1
        config = TrainerConfig(
            root_dir="dummy",
            unroll_length=1,
            mini_batch_length=2,
            mini_batch_size=64,
            initial_collect_steps=500,
            whole_replay_buffer_training=False,
            clear_replay_buffer=False,
            num_envs=num_env,
        )
        env_class = PolicyUnittestEnv

        steps_per_episode = 13
        env = env_class(num_env,
                        steps_per_episode,
                        action_type=ActionType.Discrete)

        eval_env = env_class(100,
                             steps_per_episode,
                             action_type=ActionType.Discrete)

        obs_spec = env._observation_spec
        action_spec = env._action_spec

        fc_layer_params = (10, 10)

        q_network = partial(QNetwork, fc_layer_params=fc_layer_params)

        alg2 = SacAlgorithm(observation_spec=obs_spec,
                            action_spec=action_spec,
                            q_network_cls=q_network,
                            use_parallel_network=use_parallel_network,
                            env=env,
                            config=config,
                            critic_optimizer=alf.optimizers.Adam(lr=1e-3),
                            alpha_optimizer=alf.optimizers.Adam(lr=1e-2),
                            debug_summaries=False,
                            name="MySAC")

        eval_env.reset()
        for i in range(700):
            alg2.train_iter()
            if i < config.initial_collect_steps:
                continue
            eval_env.reset()
            eval_time_step = unroll(eval_env, alg2, steps_per_episode - 1)
            logging.log_every_n_seconds(
                logging.INFO,
                "%d reward=%f" % (i, float(eval_time_step.reward.mean())),
                n_seconds=1)

        self.assertAlmostEqual(1.0,
                               float(eval_time_step.reward.mean()),
                               delta=0.2)

Exemple #10

0

Afficher le fichier

def euc_dist_batched(x_i, nodes, batch_size=1000):
    """Batched cdist operation."""
    dists = np.zeros((x_i.shape[0], nodes.shape[0]), np.float32)
    for i in range(0, x_i.shape[0], batch_size):
        logging.log_every_n_seconds(logging.INFO, 'euc_dist_batched processed %s of %s', 5, i, x_i.shape[0])
        for j in range(0, nodes.shape[0], batch_size):
            logging.log_every_n_seconds(logging.INFO, 'euc_dist_batched processed %s of %s', 5, j, nodes.shape[0])
            dists[i:(i + batch_size), j:(j + batch_size)] = squared_euclidean_cdist(
                x_i[i:(i + batch_size), :], nodes[j:(j + batch_size), :]).numpy()
    return dists

Exemple #11

0

Afficher le fichier

 def p_par_to_batched_np(self, x_i, nodes, batch_size=1000):
     dists = np.zeros((x_i.shape[0], nodes.shape[0]), np.float32)
     for i in range(0, x_i.shape[0], batch_size):
         logging.log_every_n_seconds(
             logging.INFO, 'p_par_to_batched_np processed %s of %s', 5, i,
             x_i.shape[0])
         for j in range(0, nodes.shape[0], batch_size):
             dists[i:(i + batch_size),
                   j:(j + batch_size)] = self.p_par_to_broadcast(
                       x_i[i:(i + batch_size), :],
                       nodes[j:(j + batch_size), :]).numpy()
     return dists

Exemple #12

0

Afficher le fichier

    def write_tsv(self,
                  filename,
                  leaves,
                  pids=None,
                  lbls=None,
                  update_cache=True):
        logging.info('Writing tree tsv to %s' % filename)
        logging.info('num leaves %s' % leaves.shape[0])
        logging.info('pids is None? %s' % (pids is None))
        logging.info('lbls is None? %s' % (lbls is None))
        internals = self.internals.numpy()
        leaf_to_par_assign = self.p_par_assign_to(leaves, internals)
        internal_to_par_assign = self.p_par_assign_to_internal(
            internals, internals, proj_child=False)
        self.cached_pairs = np.concatenate([
            np.expand_dims(np.arange(internal_to_par_assign.shape[0]), 1),
            np.expand_dims(internal_to_par_assign, 1)
        ],
                                           axis=1)
        self.cached_pairs = self.cached_pairs[self.cached_pairs[:, 1] != -1]
        with open(filename + '.internals', 'w') as fouti:
            with open(filename + '.leaves', 'w') as foutl:
                with open(filename, 'w') as fout:
                    i = -1
                    pid = 'int_%s' % i
                    best_pid = 'best_int_%s' % i
                    par_id = 'None'
                    fout.write('%s\t%s\tNone\n' % (pid, par_id))
                    fout.write('%s\t%s\tNone\n' % (best_pid, pid))

                    fouti.write('%s\t%s\tNone\n' % (pid, par_id))
                    fouti.write('%s\t%s\tNone\n' % (best_pid, pid))

                    for i in range(leaf_to_par_assign.shape[0]):
                        logging.log_every_n_seconds(logging.INFO,
                                                    'Wrote %s leaves' % i, 5)
                        pid = 'pt_%s' % i if pids is None else pids[i]
                        lbl = pid if lbls is None else lbls[i]
                        par_id = 'best_int_%s' % leaf_to_par_assign[i]
                        fout.write('%s\t%s\t%s\n' % (pid, par_id, lbl))
                        foutl.write('%s\t%s\t%s\n' % (pid, par_id, lbl))

                    for i in range(internal_to_par_assign.shape[0]):
                        logging.log_every_n_seconds(logging.INFO,
                                                    'Wrote %s internals' % i,
                                                    5)
                        pid = 'int_%s' % i
                        par_id = 'int_%s' % internal_to_par_assign[i]
                        best_pid = 'best_int_%s' % i
                        fout.write('%s\t%s\tNone\n' % (pid, par_id))
                        fout.write('%s\t%s\tNone\n' % (best_pid, par_id))
                        fouti.write('%s\t%s\tNone\n' % (pid, par_id))
                        fouti.write('%s\t%s\tNone\n' % (best_pid, par_id))

Exemple #13

0

Afficher le fichier

Fichier : replay_data.py Projet : djhedges/exit_speed

def ReplayLog(filepath, include_sleep=False):
  """Replays data, extermely useful to LED testing.

  Args:
    filepath: A string of the path of lap data.
    include_sleep: If True replays adds sleeps to simulate how data was
                   processed in real time.

  Returns:
    A exit_speed_main.ExitSpeed instance that has replayed the given data.
  """
  logging.info('Replaying %s', filepath)
  logger = data_logger.Logger(filepath)
  points = list(logger.ReadProtos())
  logging.info('Number of points %d', len(points))
  if include_sleep:
    replay_start = time.time()
    time_shift = int(replay_start * 1e9 - points[0].time.ToNanoseconds())
    session_start = None
  else:
    FLAGS.set_default('commit_cycle', 10000)
  es = exit_speed_main.ExitSpeed(live_data=not include_sleep)
  es.point = points[0]
  es.point_queue.put(points[0].SerializeToString())
  es.config['car'] = os.path.split(os.path.dirname(filepath))[1]
  es.InitializeSubProcesses()
  es.AddNewLap()
  for point in points:
    if include_sleep:
      point.time.FromNanoseconds(point.time.ToNanoseconds() + time_shift)
      if not session_start:
        session_start = point.time.ToMilliseconds() / 1000

    es.point = point
    es.ProcessLap()
    if include_sleep:
      run_delta = time.time() - replay_start
      point_delta = point.time.ToMilliseconds() / 1000 - session_start
      if run_delta < point_delta:
        time.sleep(point_delta - run_delta)

  if not include_sleep:
    time.sleep(1)
    qsize = len(es.timescale.point_queue)
    while qsize > 0:
      qsize = len(es.timescale.point_queue)
      logging.log_every_n_seconds(logging.INFO, 'Queue size %s', 2, qsize)
    es.timescale.stop_process_signal.value = True
    print(time.time())
    es.timescale.process.join(10)
    print(time.time())
  return es

Exemple #14

0

Afficher le fichier

def process_file(point2clusters, clusters, pkl_file):
    res = pickle.load(open(pkl_file, 'rb'))
    num_assign = 0
    for c, res in res.items():
        logging.log_first_n(logging.INFO, 'canopy %s', 5, c)
        for idx in range(len(res[0])):
            if res[1][idx] not in clusters:
                logging.log_every_n_seconds(logging.INFO, 'new cluster %s -> %s', 1, res[1][idx], len(clusters))
                clusters[res[1][idx]] = len(clusters)
            point2clusters[res[0][idx]].append(clusters[res[1][idx]])
            logging.log_every_n_seconds(logging.INFO, 'points %s -> %s', 1, res[0][idx], clusters[res[1][idx]])
            num_assign += 1
    return num_assign

Exemple #15

0

Afficher le fichier

 def Run(self) -> None:
     """Runs exit speed in a loop."""
     self.InitializeSubProcesses()
     self.AddNewLap()
     while True:
         self.point = gps_pb2.Point().FromString(self.point_queue.get())
         self.ProcessLap()
         logging.log_every_n_seconds(
             logging.INFO, 'Main: Point queue size currently at %d.', 10,
             self.point_queue.qsize())
         self.sdnotify.notify('STATUS=Last report time:%s' %
                              self.point.time.ToJsonString())
         self.sdnotify.notify('WATCHDOG=1')

Exemple #16

0

Afficher le fichier

Fichier : policy_trainer.py Projet : emailweixu/alf

    def _train(self):
        for env in self._envs:
            env.reset()
        time_step = self._driver.get_initial_time_step()
        policy_state = self._driver.get_initial_policy_state()
        iter_num = 0
        while True:
            t0 = time.time()
            with record_time("time/train_iter"):
                time_step, policy_state, train_steps = self._train_iter(
                    iter_num=iter_num,
                    policy_state=policy_state,
                    time_step=time_step)
            t = time.time() - t0
            logging.log_every_n_seconds(logging.INFO,
                                        '%s time=%.3f throughput=%0.2f' %
                                        (iter_num, t, int(train_steps) / t),
                                        n_seconds=1)
            if (iter_num + 1) % self._checkpoint_interval == 0:
                self._save_checkpoint()
            if self._evaluate and (iter_num + 1) % self._eval_interval == 0:
                self._eval()
            if iter_num == 0:
                # We need to wait for one iteration to get the operative args
                # Right just give a fixed gin file name to store operative args
                common.write_gin_configs(self._root_dir, "configured.gin")
                with tf.summary.record_if(True):

                    def _markdownify(paragraph):
                        return "    ".join(
                            (os.linesep + paragraph).splitlines(keepends=True))

                    common.summarize_gin_config()
                    tf.summary.text('commandline', ' '.join(sys.argv))
                    tf.summary.text(
                        'optimizers',
                        _markdownify(self._algorithm.get_optimizer_info()))
                    tf.summary.text('revision', git_utils.get_revision())
                    tf.summary.text('diff', _markdownify(git_utils.get_diff()))
                    tf.summary.text('seed', str(self._random_seed))

            # check termination
            env_steps_metric = self._driver.get_step_metrics()[1]
            total_time_steps = env_steps_metric.result().numpy()
            iter_num += 1
            if (self._num_iterations and iter_num >= self._num_iterations) \
                or (self._num_env_steps and total_time_steps >= self._num_env_steps):
                break

Exemple #17

0

Afficher le fichier

 def step(start, end):
     for i in range(start, end):
         token_indices_sequence = dataset.token_indices[dataset_type][i]
         feed_dict = {
           model.input_token_indices: token_indices_sequence,
           model.input_leading_spaces: dataset.leading_spaces[dataset_type][i],
           model.input_capitalization: model.InputCapitalization.create_columns(map(dataset.index_to_token.get, token_indices_sequence)),
           model.input_token_character_indices: dataset.character_indices_padded[dataset_type][i],
           model.input_token_lengths: dataset.token_lengths[dataset_type][i],
           model.input_label_indices_vector: dataset.label_vector_indices[dataset_type][i],
           model.dropout_keep_prob: 1.,
           model.recall_inference_bias: parameters['recall_inference_bias'] if not parameters['train_model'] else 0.0,
         }
         res[i] = unary_scores, predictions = sess.run([model.unary_scores,
             model.predictions], feed_dict)
         logging.log_every_n_seconds(logging.INFO, 'Predict... run model [{dataset}] {percent}%'.format(dataset=dataset_type ,percent=round(100*i/len(dataset.token_indices[dataset_type]), ndigits=1)), n_seconds=10)

Exemple #18

0

Afficher le fichier

 def __log_status(block):
     is_root = isinstance(block, RootBlock)
     shard = "R" if is_root else block.header.branch.get_shard_id()
     count = len(block.minor_block_header_list) if is_root else len(
         block.tx_list)
     elapsed = time.time() - block.header.create_time
     GLOG.log_every_n_seconds(
         GLOG.INFO,
         "[{}] {} [{}] ({:.2f}) {}".format(
             shard,
             block.header.height,
             count,
             elapsed,
             block.header.get_hash().hex(),
         ),
         60,
     )

Exemple #19

0

Afficher le fichier

Fichier : data_lib.py Projet : kokizzu/google-research

    def process(self, element):
        """Transform text and labels to a machine learning ready form.

    Beam DoFn to generate a DataAndLabels of the features to be used in
    training.
    Yields key values tuples where the key is the label source.
    Args:
      element: Keyed APData

    Yields:
      key, value pair of the APData with the added features.
    """
        (key, ap_data) = element

        # Get Features:
        token_features = generate_token_features(ap_data.tokens, self._vocab)

        # Get labels:
        labels = generate_model_labels(ap_data.labeled_char_spans,
                                       ap_data.tokens)

        # Filter whitespaces:
        token_mask = [t.token_text == " " for t in ap_data.tokens]
        tokens = self.filter_whitespaces(ap_data.tokens, token_mask)
        self.seq_length_dist.update(len(tokens))

        if self._max_seq_length and len(tokens) > self._max_seq_length:
            self.over_max_seq_length.inc()
            logging.log_every_n_seconds(logging.INFO,
                                        "AP section too long: %d tokens", 3,
                                        len(tokens))
            return

        token_features = self.filter_whitespaces_dict(token_features,
                                                      token_mask)
        labels = self.filter_whitespaces_dict(labels, token_mask)

        # Pad.
        token_features = self.pad(token_features, value=0)
        labels = self.pad(labels, value=-1)

        ap_data.tokens = tokens
        ap_data.token_features = token_features
        ap_data.labels = labels

        yield (key, ap_data)

Exemple #20

0

Afficher le fichier

 def ReadValues(self):
     """Reads the labjack voltages."""
     try:
         results = self.u3.getFeedback(*self.commands)
         for command in self.commands:
             result = results[self.commands.index(command)]
             voltage = self.u3.binaryToCalibratedAnalogVoltage(
                 result,
                 isLowVoltage=False,
                 channelNumber=command.positiveChannel)
             proto_field = self.command_proto_field[command]
             self.voltage_values[proto_field].value = voltage
     except u3.LabJackException:
         stack_trace = ''.join(traceback.format_exception(*sys.exc_info()))
         logging.log_every_n_seconds(logging.ERROR,
                                     'Error reading labjack values\n%s', 10,
                                     stack_trace)

Exemple #21

0

Afficher le fichier

    def test_sarsa(self,
                   on_policy=False,
                   sac=True,
                   use_rnn=False,
                   priority_replay=False):
        logging.info("sac=%d on_policy=%s use_rnn=%s" %
                     (sac, on_policy, use_rnn))
        env_class = PolicyUnittestEnv
        iterations = 500
        num_env = 128
        if on_policy:
            num_env = 128
        steps_per_episode = 12
        env = env_class(num_env,
                        steps_per_episode,
                        action_type=ActionType.Continuous)
        eval_env = env_class(100,
                             steps_per_episode,
                             action_type=ActionType.Continuous)

        algorithm = _create_algorithm(env,
                                      on_policy=on_policy,
                                      sac=sac,
                                      use_rnn=use_rnn,
                                      priority_replay=priority_replay)

        env.reset()
        eval_env.reset()
        for i in range(iterations):
            algorithm.train_iter()

            eval_env.reset()
            eval_time_step = unroll(eval_env, algorithm, steps_per_episode - 1)
            logging.log_every_n_seconds(
                logging.INFO,
                "%d reward=%f" % (i, float(eval_time_step.reward.mean())),
                n_seconds=1)

        self.assertAlmostEqual(1.0,
                               float(eval_time_step.reward.mean()),
                               delta=0.3)

Exemple #22

0

Afficher le fichier

    def _train(self):
        for env in self._envs:
            env.reset()
        time_step = self._driver.get_initial_time_step()
        policy_state = self._driver.get_initial_policy_state()
        iter_num = 0
        while True:
            t0 = time.time()
            time_step, policy_state, train_steps = self.train_iter(
                iter_num=iter_num,
                policy_state=policy_state,
                time_step=time_step)
            t = time.time() - t0
            logging.log_every_n_seconds(logging.INFO,
                                        '%s time=%.3f throughput=%0.2f' %
                                        (iter_num, t, int(train_steps) / t),
                                        n_seconds=1)
            tf.summary.scalar("time/train_iter", t)
            if (iter_num + 1) % self._checkpoint_interval == 0:
                self._save_checkpoint()
            if self._evaluate and (iter_num + 1) % self._eval_interval == 0:
                self._eval()
            if iter_num == 0:
                # We need to wait for one iteration to get the operative args
                # Right just give a fixed gin file name to store operative args
                common.write_gin_configs(self._root_dir, "configured.gin")
                with tf.summary.record_if(True):
                    common.summarize_gin_config()
                    tf.summary.text('commandline', ' '.join(sys.argv))
                    tf.summary.text('optimizers',
                                    self._algorithm.get_optimizer_info())

            # check termination
            env_steps_metric = self._driver.get_step_metrics()[1]
            total_time_steps = env_steps_metric.result().numpy()
            iter_num += 1
            if (self._num_iterations and iter_num >= self._num_iterations) \
                or (self._num_env_steps and total_time_steps >= self._num_env_steps):
                break

Exemple #23

0

Afficher le fichier

 def Do(self):
     """One iteration of the infinite loop."""
     lap = None
     lap_number_und_duration = None
     point = None
     try:
         if not self.timescale_conn:
             self.timescale_conn = GetConnWithPointPrepare()
         lap = self.GetLapFromQueue()
         lap_number_und_duration = self.GetLapDurationFromQueue()
         point_und_lap_number = self.GetPointFromQueue()
         if point_und_lap_number:
             with self.timescale_conn.cursor() as cursor:
                 self.ExportSession(cursor)
                 if lap:
                     self.ExportLap(lap, cursor)
                 if lap_number_und_duration:
                     self.UpdateLapDuration(lap_number_und_duration[0],
                                            lap_number_und_duration[1],
                                            cursor)
                 self.ExportPoint(point_und_lap_number[0],
                                  point_und_lap_number[1], cursor)
                 self._Commit()
     except psycopg2.Error:
         stack_trace = ''.join(traceback.format_exception(*sys.exc_info()))
         logging.log_every_n_seconds(
             logging.ERROR, 'Error writing to timescale database\n%s', 10,
             stack_trace)
         # Repopulate queues on errors.
         if lap:
             self.lap_queue.put(lap)
         if lap_number_und_duration:
             self.lap_duration_queue.put(lap_number_und_duration)
         if point:
             self.retry_point_queue.append(point_und_lap_number)
         self.timescale_conn = None  # Reset connection

Exemple #24

0

Afficher le fichier

def render_image(state,
                 rays_dict,
                 model_fn,
                 device_count,
                 rng,
                 chunk=8192,
                 default_ret_key=None):
    """Render all the pixels of an image (in test mode).

  Args:
    state: model_utils.TrainState.
    rays_dict: dict, test example.
    model_fn: function, jit-ed render function.
    device_count: The number of devices to shard batches over.
    rng: The random number generator.
    chunk: int, the size of chunks to render sequentially.
    default_ret_key: either 'fine' or 'coarse'. If None will default to highest.

  Returns:
    rgb: jnp.ndarray, rendered color image.
    depth: jnp.ndarray, rendered depth.
    acc: jnp.ndarray, rendered accumulated weights per pixel.
  """
    h, w = rays_dict['origins'].shape[:2]
    rays_dict = tree_util.tree_map(lambda x: x.reshape((h * w, -1)), rays_dict)
    num_rays = h * w
    _, key_0, key_1 = jax.random.split(rng, 3)
    key_0 = jax.random.split(key_0, device_count)
    key_1 = jax.random.split(key_1, device_count)
    host_id = jax.process_index()
    ret_maps = []
    start_time = time.time()
    num_batches = int(math.ceil(num_rays / chunk))
    for batch_idx in range(num_batches):
        ray_idx = batch_idx * chunk
        logging.log_every_n_seconds(logging.INFO,
                                    'Rendering batch %d/%d (%d/%d)', 2.0,
                                    batch_idx, num_batches, ray_idx, num_rays)
        # pylint: disable=cell-var-from-loop
        chunk_slice_fn = lambda x: x[ray_idx:ray_idx + chunk]
        chunk_rays_dict = tree_util.tree_map(chunk_slice_fn, rays_dict)
        num_chunk_rays = chunk_rays_dict['origins'].shape[0]
        remainder = num_chunk_rays % device_count
        if remainder != 0:
            padding = device_count - remainder
            # pylint: disable=cell-var-from-loop
            chunk_pad_fn = lambda x: jnp.pad(x, ((0, padding), (0, 0)),
                                             mode='edge')
            chunk_rays_dict = tree_util.tree_map(chunk_pad_fn, chunk_rays_dict)
        else:
            padding = 0
        # After padding the number of chunk_rays is always divisible by
        # host_count.
        per_host_rays = num_chunk_rays // jax.process_count()
        chunk_rays_dict = tree_util.tree_map(
            lambda x: x[(host_id * per_host_rays):
                        ((host_id + 1) * per_host_rays)], chunk_rays_dict)
        chunk_rays_dict = utils.shard(chunk_rays_dict, device_count)
        model_out = model_fn(key_0, key_1, state.optimizer.target['model'],
                             chunk_rays_dict, state.warp_extra)
        if not default_ret_key:
            ret_key = 'fine' if 'fine' in model_out else 'coarse'
        else:
            ret_key = default_ret_key
        ret_map = jax_utils.unreplicate(model_out[ret_key])
        ret_map = jax.tree_map(lambda x: utils.unshard(x, padding), ret_map)
        ret_maps.append(ret_map)
    ret_map = jax.tree_multimap(lambda *x: jnp.concatenate(x, axis=0),
                                *ret_maps)
    logging.info('Rendering took %.04s', time.time() - start_time)
    out = {}
    for key, value in ret_map.items():
        out[key] = value.reshape((h, w, *value.shape[1:]))

    return out

Exemple #25

0

Afficher le fichier

    def test_sac_algorithm(self, use_parallel_network, reward_dim):
        num_env = 1
        config = TrainerConfig(
            root_dir="dummy",
            unroll_length=1,
            mini_batch_length=2,
            mini_batch_size=64,
            initial_collect_steps=500,
            whole_replay_buffer_training=False,
            clear_replay_buffer=False,
            num_envs=1,
        )
        env_class = PolicyUnittestEnv
        steps_per_episode = 13
        env = env_class(num_env,
                        steps_per_episode,
                        action_type=ActionType.Continuous,
                        reward_dim=reward_dim)

        eval_env = env_class(100,
                             steps_per_episode,
                             action_type=ActionType.Continuous,
                             reward_dim=reward_dim)

        obs_spec = env._observation_spec
        action_spec = env._action_spec

        fc_layer_params = (10, 10)

        continuous_projection_net_ctor = partial(
            alf.networks.NormalProjectionNetwork,
            state_dependent_std=True,
            scale_distribution=True,
            std_transform=clipped_exp)

        actor_network = partial(
            ActorDistributionNetwork,
            fc_layer_params=fc_layer_params,
            continuous_projection_net_ctor=continuous_projection_net_ctor)

        critic_network = partial(CriticNetwork,
                                 output_tensor_spec=env.reward_spec(),
                                 joint_fc_layer_params=fc_layer_params)

        alg = SacAlgorithm(observation_spec=obs_spec,
                           action_spec=action_spec,
                           actor_network_cls=actor_network,
                           critic_network_cls=critic_network,
                           use_parallel_network=use_parallel_network,
                           use_entropy_reward=reward_dim == 1,
                           env=env,
                           config=config,
                           actor_optimizer=alf.optimizers.Adam(lr=1e-2),
                           critic_optimizer=alf.optimizers.Adam(lr=1e-2),
                           alpha_optimizer=alf.optimizers.Adam(lr=1e-2),
                           debug_summaries=False,
                           name="MySAC")

        eval_env.reset()
        for i in range(700):
            alg.train_iter()
            if i < config.initial_collect_steps:
                continue
            eval_env.reset()
            eval_time_step = unroll(eval_env, alg, steps_per_episode - 1)
            logging.log_every_n_seconds(
                logging.INFO,
                "%d reward=%f" % (i, float(eval_time_step.reward.mean())),
                n_seconds=1)

        self.assertAlmostEqual(1.0,
                               float(eval_time_step.reward.mean()),
                               delta=0.3)

Exemple #26

0

Afficher le fichier

def prediction_step(sess, dataset, dataset_type, model, transition_params_trained,
    stats_graph_folder, epoch_number, parameters, dataset_filepaths):
    """
    Predict.
    """
    if dataset_type == 'deploy':
        print('Predict labels for the {0} set'.format(dataset_type))
    else:
        print('Evaluate model on the {0} set'.format(dataset_type))

    all_predictions = []
    all_y_true = []
    output_filepath = os.path.join(stats_graph_folder, '{1:03d}_{0}.txt'.format(dataset_type,
        epoch_number))
    output_file = codecs.open(output_filepath, 'w', 'UTF-8')
    original_conll_file = codecs.open(dataset_filepaths[dataset_type], 'r', 'UTF-8')

    res = [None] * len(dataset.token_indices[dataset_type])

    def step(start, end):
        for i in range(start, end):
            token_indices_sequence = dataset.token_indices[dataset_type][i]
            feed_dict = {
              model.input_token_indices: token_indices_sequence,
              model.input_leading_spaces: dataset.leading_spaces[dataset_type][i],
              model.input_capitalization: model.InputCapitalization.create_columns(map(dataset.index_to_token.get, token_indices_sequence)),
              model.input_token_character_indices: dataset.character_indices_padded[dataset_type][i],
              model.input_token_lengths: dataset.token_lengths[dataset_type][i],
              model.input_label_indices_vector: dataset.label_vector_indices[dataset_type][i],
              model.dropout_keep_prob: 1.,
              model.recall_inference_bias: parameters['recall_inference_bias'] if not parameters['train_model'] else 0.0,
            }
            res[i] = unary_scores, predictions = sess.run([model.unary_scores,
                model.predictions], feed_dict)
            logging.log_every_n_seconds(logging.INFO, 'Predict... run model [{dataset}] {percent}%'.format(dataset=dataset_type ,percent=round(100*i/len(dataset.token_indices[dataset_type]), ndigits=1)), n_seconds=10)

    chunk_size = min(1, int(len(res) / parameters['number_of_cpu_threads_prediction']))
    threads = [threading.Thread(target=step, args=(i, min(len(res), i+chunk_size))) for i in range(0, len(res), chunk_size)]
    for t in threads:
        while True:
            try:
                t.start()
            except RuntimeError as e:
                logging.warning('RuntimeError: can\'t start new thread ######## threading.active_count=%d'% (threading.active_count(),) )
                time.sleep(3)
                continue
            else:
                break
    for t in threads:
        t.join()
        del t
    del threads

    for i in range(len(dataset.token_indices[dataset_type])):
        unary_scores, predictions = res[i]

        if parameters['use_crf']:
            predictions, _ = tf.contrib.crf.viterbi_decode(unary_scores,
                transition_params_trained)
            predictions = predictions[1:-1]
        else:
            predictions = predictions.tolist()

        assert(len(predictions) == len(dataset.tokens[dataset_type][i]))

        output_string = ''
        prediction_labels = [dataset.index_to_label[prediction] for prediction in predictions]
        unary_score_list = unary_scores.tolist()[1:-1]

        gold_labels = dataset.labels[dataset_type][i]

        if parameters['tagging_format'] == 'bioes':
            prediction_labels = utils_nlp.bioes_to_bio(prediction_labels)
            gold_labels = utils_nlp.bioes_to_bio(gold_labels)

        for prediction, token, gold_label, scores in zip(prediction_labels,
            dataset.tokens[dataset_type][i], gold_labels, unary_score_list):

            while True:
                line = original_conll_file.readline()
                split_line = line.strip().split(' ')

                if '-DOCSTART-' in split_line[0] or len(split_line) == 0 \
                or len(split_line[0]) == 0:
                    continue
                else:
                    token_original = split_line[0]

                    if parameters['tagging_format'] == 'bioes':
                        split_line.pop()

                    gold_label_original = split_line[-1]

                    assert(token == token_original and gold_label == gold_label_original)
                    break

            split_line.append(prediction)
            try:
                if parameters['output_scores']:
                    # space separated scores
                    scores = ' '.join([str(i) for i in scores])
                    split_line.append('{}'.format(scores))
            except KeyError:
                pass

            output_string += ' '.join(split_line) + '\n'

        output_file.write(output_string+'\n')

        all_predictions.extend(predictions)
        all_y_true.extend(dataset.label_indices[dataset_type][i])
        logging.log_every_n_seconds(logging.INFO, 'Predict... eval [{dataset}] {percent}%'.format(dataset=dataset_type ,percent=round(100*i/len(dataset.token_indices[dataset_type]), ndigits=1)), n_seconds=10)

    output_file.close()
    original_conll_file.close()

    if dataset_type != 'deploy':

        if parameters['main_evaluation_mode'] == 'conll':

            # run perl evaluation script in python package
            # conll_evaluation_script = os.path.join('.', 'conlleval')
            package_name = 'neuroner'
            root_dir = os.path.dirname(pkg_resources.resource_filename(package_name,
                '__init__.py'))
            conll_evaluation_script = os.path.join(root_dir, 'conlleval')

            conll_output_filepath = '{0}_conll_evaluation.txt'.format(output_filepath)
            shell_command = 'perl {0} < {1} > {2}'.format(conll_evaluation_script,
                output_filepath, conll_output_filepath)
            os.system(shell_command)

            with open(conll_output_filepath, 'r') as f:
                classification_report = f.read()
                print(classification_report)

        else:
            new_y_pred, new_y_true, new_label_indices, new_label_names, _, _ = remap_labels(all_predictions,
                all_y_true, dataset, parameters['main_evaluation_mode'])

            print(sklearn.metrics.classification_report(new_y_true, new_y_pred, 
                digits=4, labels=new_label_indices, target_names=new_label_names))

    return all_predictions, all_y_true, output_filepath

Exemple #27

0

Afficher le fichier

Fichier : policy_trainer.py Projet : soychanq/alf

    def _train(self):
        for env in self._envs:
            env.reset()
        if self._eval_env:
            self._eval_env.reset()

        begin_iter_num = int(self._trainer_progress._iter_num)
        iter_num = begin_iter_num

        checkpoint_interval = math.ceil(
            (self._num_iterations or self._num_env_steps) /
            self._num_checkpoints)

        if self._num_iterations:
            time_to_checkpoint = self._trainer_progress._iter_num + checkpoint_interval
        else:
            time_to_checkpoint = self._trainer_progress._env_steps + checkpoint_interval

        while True:
            t0 = time.time()
            with record_time("time/train_iter"):
                train_steps = self._algorithm.train_iter()
            t = time.time() - t0
            logging.log_every_n_seconds(
                logging.INFO,
                '%s -> %s: %s time=%.3f throughput=%0.2f' %
                (common.get_gin_file(), [
                    os.path.basename(self._root_dir.strip('/'))
                ], iter_num, t, int(train_steps) / t),
                n_seconds=1)

            if self._evaluate and (iter_num + 1) % self._eval_interval == 0:
                self._eval()
            if iter_num == begin_iter_num:
                self._summarize_training_setting()

            # check termination
            env_steps_metric = self._algorithm.get_step_metrics()[1]
            total_time_steps = env_steps_metric.result()
            iter_num += 1

            self._trainer_progress.update(iter_num, total_time_steps)

            if ((self._num_iterations and iter_num >= self._num_iterations)
                    or (self._num_env_steps
                        and total_time_steps >= self._num_env_steps)):
                # Evaluate before exiting so that the eval curve shown in TB
                # will align with the final iter/env_step.
                if self._evaluate:
                    self._eval()
                break

            if ((self._num_iterations and iter_num >= time_to_checkpoint)
                    or (self._num_env_steps
                        and total_time_steps >= time_to_checkpoint)):
                self._save_checkpoint()
                time_to_checkpoint += checkpoint_interval
            elif self._checkpoint_requested:
                logging.info("Saving checkpoint upon request...")
                self._save_checkpoint()
                self._checkpoint_requested = False

            if self._debug_requested:
                self._debug_requested = False
                import pdb
                pdb.set_trace()

Exemple #28

0

Afficher le fichier

def main(argv):
    point2clusters = collections.defaultdict(list)
    cluster_dict = dict()
    logging.info('loading canopy results..')
    total_num_assignments = process(point2clusters, cluster_dict, FLAGS.input)
    logging.info('total_num_clusters %s', len(cluster_dict))
    logging.info('total_num_assignments %s', total_num_assignments)
    logging.info('loading canopy results...done')

    row = np.ones(total_num_assignments, np.int64)
    col = np.ones(total_num_assignments, np.int64)
    data = np.ones(total_num_assignments, np.int64)
    overall_idx = 0
    pid2idx = dict()
    for idx, (pid, clusters) in tqdm(enumerate(point2clusters.items()), 'building sparse graph'):
        row[overall_idx:overall_idx + len(clusters)] *= idx
        col[overall_idx:overall_idx + len(clusters)] = np.array(clusters, dtype=np.int64) + len(point2clusters)
        # import pdb
        # pdb.set_trace()
        overall_idx += len(clusters)
        pid2idx[pid] = idx

    # import pdb
    # pdb.set_trace()
    from scipy.sparse import coo_matrix
    mat = coo_matrix((data, (row, col)),
                     shape=(len(cluster_dict) + len(point2clusters), len(cluster_dict) + len(point2clusters)))
    from scipy.sparse.csgraph import connected_components
    logging.info('running cc...')
    n_cc, lbl_cc = connected_components(mat, directed=True, connection='weak')
    logging.info('running cc...done')

    # import pdb
    # pdb.set_trace()

    logging.info('loading mentions...')
    with open(FLAGS.assignee_name_mentions, 'rb') as fin:
        assignee_mentions = pickle.load(fin)

    import uuid
    final_uuids = [str(uuid.uuid4()) for _ in range(n_cc)]
    mid2eid = dict()
    missing_mid2eid = dict()
    for amid, m in tqdm(assignee_mentions.items(), 'assigning ids'):
        if m.uuid in pid2idx:
            for rid in m.mention_ids:
                logging.log_every_n_seconds(logging.INFO,
                                            'mention: %s -> entity %s', 1,
                                            rid, final_uuids[lbl_cc[pid2idx[m.uuid]]])
                mid2eid[rid] = final_uuids[lbl_cc[pid2idx[m.uuid]]]
        else:
            logging.log_first_n(logging.INFO,
                                'we didnt do any more diambiguation for %s', 100, m.uuid)
            for rid in m.mention_ids:
                missing_mid2eid[rid] = m.uuid
    logging.info('writing output ...')
    with open(FLAGS.output, 'w') as fout:
        for m, e in mid2eid.items():
            fout.write('%s\t%s\n' % (m, e))
        for m, e in missing_mid2eid.items():
            if m not in mid2eid:
                fout.write('%s\t%s\n' % (m, e))
    logging.info('writing output ... done.')

Exemple #29

0

Afficher le fichier

Fichier : logging_functional_test_helper.py Projet : yuhc/abseil-py

def _test_do_logging():
    """Do some log operations."""
    logging.vlog(3, 'This line is VLOG level 3')
    logging.vlog(2, 'This line is VLOG level 2')
    logging.log(2, 'This line is log level 2')

    logging.vlog(1, 'This line is VLOG level 1')
    logging.log(1, 'This line is log level 1')
    logging.debug('This line is DEBUG')

    logging.vlog(0, 'This line is VLOG level 0')
    logging.log(0, 'This line is log level 0')
    logging.info('Interesting Stuff\0')
    logging.info('Interesting Stuff with Arguments: %d', 42)
    logging.info('%(a)s Stuff with %(b)s', {
        'a': 'Interesting',
        'b': 'Dictionary'
    })

    with mock.patch.object(timeit, 'default_timer') as mock_timer:
        mock_timer.return_value = 0
        while timeit.default_timer() < 9:
            logging.log_every_n_seconds(logging.INFO,
                                        'This should appear 5 times.', 2)
            mock_timer.return_value = mock_timer() + .2

    for i in xrange(1, 5):
        logging.log_first_n(logging.INFO, 'Info first %d of %d', 2, i, 2)
        logging.log_every_n(logging.INFO, 'Info %d (every %d)', 3, i, 3)

    logging.vlog(-1, 'This line is VLOG level -1')
    logging.log(-1, 'This line is log level -1')
    logging.warning('Worrying Stuff')
    for i in xrange(1, 5):
        logging.log_first_n(logging.WARNING, 'Warn first %d of %d', 2, i, 2)
        logging.log_every_n(logging.WARNING, 'Warn %d (every %d)', 3, i, 3)

    logging.vlog(-2, 'This line is VLOG level -2')
    logging.log(-2, 'This line is log level -2')
    try:
        raise OSError('Fake Error')
    except OSError:
        saved_exc_info = sys.exc_info()
        logging.exception('An Exception %s')
        logging.exception('Once more, %(reason)s', {'reason': 'just because'})
        logging.error('Exception 2 %s', exc_info=True)
        logging.error('Non-exception', exc_info=False)

    try:
        sys.exc_clear()
    except AttributeError:
        # No sys.exc_clear() in Python 3, but this will clear sys.exc_info() too.
        pass

    logging.error('Exception %s', '3', exc_info=saved_exc_info)
    logging.error('No traceback', exc_info=saved_exc_info[:2] + (None, ))

    logging.error('Alarming Stuff')
    for i in xrange(1, 5):
        logging.log_first_n(logging.ERROR, 'Error first %d of %d', 2, i, 2)
        logging.log_every_n(logging.ERROR, 'Error %d (every %d)', 3, i, 3)
    logging.flush()

Exemple #30

0

Afficher le fichier

Fichier : off_policy_driver_test.py Projet : emailweixu/alf

    def test_off_policy_algorithm(self, algorithm_ctor, use_rollout_state,
                                  sync_driver):
        logging.info("{} {}".format(algorithm_ctor.__name__, sync_driver))

        batch_size = 128
        if use_rollout_state:
            steps_per_episode = 5
            mini_batch_length = 8
            unroll_length = 8
            env_class = RNNPolicyUnittestEnv
        else:
            steps_per_episode = 12
            mini_batch_length = 2
            unroll_length = 12
            env_class = PolicyUnittestEnv
        env = TFPyEnvironment(
            env_class(
                batch_size,
                steps_per_episode,
                action_type=ActionType.Continuous))

        eval_env = TFPyEnvironment(
            env_class(
                batch_size,
                steps_per_episode,
                action_type=ActionType.Continuous))

        common.set_global_env(env)
        algorithm = algorithm_ctor()
        algorithm.set_summary_settings(summarize_grads_and_vars=True)
        algorithm.use_rollout_state = use_rollout_state

        if sync_driver:
            driver = SyncOffPolicyDriver(env, algorithm)
        else:
            driver = AsyncOffPolicyDriver([env],
                                          algorithm,
                                          num_actor_queues=1,
                                          unroll_length=unroll_length,
                                          learn_queue_cap=1,
                                          actor_queue_cap=1)
        eval_driver = OnPolicyDriver(eval_env, algorithm, training=False)

        eval_env.reset()
        driver.start()
        if sync_driver:
            time_step = driver.get_initial_time_step()
            policy_state = driver.get_initial_policy_state()
            for i in range(5):
                time_step, policy_state = driver.run(
                    max_num_steps=batch_size * steps_per_episode,
                    time_step=time_step,
                    policy_state=policy_state)

        for i in range(500):
            if sync_driver:
                time_step, policy_state = driver.run(
                    max_num_steps=batch_size * mini_batch_length * 2,
                    time_step=time_step,
                    policy_state=policy_state)
                whole_replay_buffer_training = False
                clear_replay_buffer = False
            else:
                driver.run_async()
                whole_replay_buffer_training = True
                clear_replay_buffer = True

            driver.algorithm.train(
                mini_batch_size=128,
                mini_batch_length=mini_batch_length,
                whole_replay_buffer_training=whole_replay_buffer_training,
                clear_replay_buffer=clear_replay_buffer)
            eval_env.reset()
            eval_time_step, _ = eval_driver.run(
                max_num_steps=(steps_per_episode - 1) * batch_size)
            logging.log_every_n_seconds(
                logging.INFO,
                "%d reward=%f" %
                (i, float(tf.reduce_mean(eval_time_step.reward))),
                n_seconds=1)
        driver.stop()

        self.assertAlmostEqual(
            1.0, float(tf.reduce_mean(eval_time_step.reward)), delta=2e-1)