Exemple #1
0
    def parse_file(self, directory):
        mls = []
        for f in self.parser.find_all_files(directory):
            if os.path.isdir(f):
                continue
            try:
                self.logger.info(f + " will be parsed.")
                mls.extend(self.parser.parse_summary(f, self.metrics))
            except Exception as e:
                self.logger.warning("Unexpected error: " + str(e))
                continue

        # Metrics logs must contain at least one objective metric value
        # Objective metric is located at first index
        is_objective_metric_reported = False
        for ml in mls:
            if ml.metric.name == self.metrics[0]:
                is_objective_metric_reported = True
                break
        # If objective metrics were not reported, insert unavailable value in the DB
        if not is_objective_metric_reported:
            mls = [
                api_pb2.MetricLog(time_stamp=rfc3339.rfc3339(datetime.now()),
                                  metric=api_pb2.Metric(
                                      name=self.metrics[0],
                                      value=const.UNAVAILABLE_METRIC_VALUE))
            ]
            self.logger.info(
                "Objective metric {} is not found in training logs, {} value is reported"
                .format(self.metrics[0], const.UNAVAILABLE_METRIC_VALUE))

        return api_pb2.ObservationLog(metric_logs=mls)
Exemple #2
0
    def parse_summary(self, tfefile):
        metric_logs = []
        event_accumulator = EventAccumulator(tfefile,
                                             size_guidance={'tensors': 0})
        event_accumulator.Reload()
        for tag in event_accumulator.Tags()['tensors']:
            for m in self.metric_names:

                tfefile_parent_dir = os.path.dirname(m) if len(
                    m.split("/")) >= 2 else os.path.dirname(tfefile)
                basedir_name = os.path.dirname(tfefile)
                if not tag.startswith(
                        m.split("/")[-1]) or not basedir_name.endswith(
                            tfefile_parent_dir):
                    continue

                for wall_time, step, tensor in event_accumulator.Tensors(tag):
                    ml = api_pb2.MetricLog(time_stamp=rfc3339.rfc3339(
                        datetime.fromtimestamp(wall_time)),
                                           metric=api_pb2.Metric(
                                               name=m,
                                               value=str(
                                                   tf.make_ndarray(tensor))))
                    metric_logs.append(ml)

        return metric_logs
Exemple #3
0
 def parse_summary(self, tfefile, metrics):
     metric_logs = []
     for summary in tf.train.summary_iterator(tfefile):
         paths = tfefile.split("/")
         for v in summary.summary.value:
             for m in metrics:
                 tag = str(v.tag)
                 if len(paths) >= 2 and len(m.split("/")) >= 2:
                     tag = str(paths[-2] + "/" + v.tag)
                 if tag.startswith(m):
                     ml = api_pb2.MetricLog(time_stamp=rfc3339.rfc3339(
                         datetime.fromtimestamp(summary.wall_time)),
                                            metric=api_pb2.Metric(
                                                name=m,
                                                value=str(v.simple_value)))
                     metric_logs.append(ml)
     return metric_logs
def register_trial(stub):
  try:
    obj = api_pb2.ObjectiveSpec(type=1, goal=0.09, objective_metric_name="loss")
    parameters = api_pb2.TrialSpec.ParameterAssignments(assignments=[api_pb2.ParameterAssignment(name="rl", value="0.01")])
    spec = api_pb2.TrialSpec(experiment_name=TEST_EXPERIMENT,
                             objective=obj,
                             run_spec="a batch/job resource",
                             metrics_collector_spec="metrics/collector",
                             parameter_assignments=parameters)
    observation = api_pb2.Observation(metrics=[api_pb2.Metric(name="loss", value="0.54")])
    status = api_pb2.TrialStatus(condition=2,
                                 observation=observation,
                                 start_time="2019-04-28T17:09:15Z",
                                 completion_time="2019-04-28T18:09:15Z")
    t = api_pb2.Trial(name=TEST_TRIAL, status=status, spec=spec)
    stub.RegisterTrial(api_pb2.RegisterTrialRequest(trial=t), 10)
    logger.info("Register trial %s successfully" % TEST_TRIAL)
  except:
    logger.error("Failed to register trial %s" % TEST_TRIAL, exc_info=True)
    raise