Beispiel #1
0
def stream_tfevents(path, file_api, run, step=0, namespace=""):
    """Parses and streams a tfevents file to the server"""
    last_step = 0
    row = {}
    buffer = []
    last_row = {}
    global_step_key = namespaced_tag("global_step", namespace)
    try:
        for summary in tf.train.summary_iterator(path):
            parsed = tf_summary_to_dict(summary, namespace=namespace)
            if last_step != parsed[global_step_key]:
                last_step = parsed[global_step_key]
                if len(row) > 3:  # Must have more than _timestamp, _step, and global_step
                    step += 1
                    row["_step"] = step
                    last_row = history_dict_to_json(run, deepcopy(row))
                    file_api.push("wandb-history.jsonl", util.json_dumps_safer_history(last_row))
                    row = {}
            row.update(parsed)
    except tf.errors.DataLossError:
        wandb.termwarn("Found a truncated record in tfevents file, stopping parse")
    step += 1
    row["_step"] = step
    last_row = history_dict_to_json(run, deepcopy(row))
    file_api.push("wandb-history.jsonl", util.json_dumps_safer_history(last_row))
    return last_row
Beispiel #2
0
 def _write(self):
     self._current_timestamp = self._current_timestamp or time.time()
     # Jupyter closes files between cell executions, this reopens the file
     # for resuming.
     if self._file == None:
         self._file = open(self.fname, 'a')
     if self.row:
         self._lock.acquire()
         # Jupyter starts logging the first time wandb.log is called in a cell.
         # This will resume the run and potentially update self._steps
         self.ensure_jupyter_started()
         try:
             self.row[
                 '_runtime'] = self._current_timestamp - self._start_time
             self.row['_timestamp'] = self._current_timestamp
             self.row['_step'] = self._steps
             if self.stream_name != "default":
                 self.row["_stream"] = self.stream_name
             self._transform()
             self._file.write(util.json_dumps_safer_history(self.row))
             self._file.write('\n')
             self._file.flush()
             os.fsync(self._file.fileno())
             if self._add_callback:
                 self._add_callback(self.row)
             self._index(self.row)
             self.row = {}
         finally:
             self._lock.release()
         return True
     else:
         return False
Beispiel #3
0
 def _write(self):
     self._current_timestamp = self._current_timestamp or time.time()
     # Saw a race in tests where we closed history and another log was called
     # we check if self._file is set to ensure we don't bomb out
     if self.row and self._file:
         self._lock.acquire()
         # Jupyter starts logging the first time wandb.log is called in a cell.
         # This will resume the run and potentially update self._steps
         self.ensure_jupyter_started()
         try:
             self.row[
                 '_runtime'] = self._current_timestamp - self._start_time
             self.row['_timestamp'] = self._current_timestamp
             self.row['_step'] = self._steps
             if self.stream_name != "default":
                 self.row["_stream"] = self.stream_name
             self._transform()
             self._file.write(util.json_dumps_safer_history(self.row))
             self._file.write('\n')
             self._file.flush()
             os.fsync(self._file.fileno())
             if self._add_callback:
                 self._add_callback(self.row)
             self._index(self.row)
             self.row = {}
         finally:
             self._lock.release()
         return True
     else:
         return False
Beispiel #4
0
 def publish_history(self, data, step=None, run=None):
     run = run or self._run
     data = data_types.history_dict_to_json(run, data, step=step)
     history = wandb_internal_pb2.HistoryRecord()
     for k, v in six.iteritems(data):
         item = history.item.add()
         item.key = k
         item.value_json = json_dumps_safer_history(v)
     self._publish_history(history)
Beispiel #5
0
 def _save_row(self, row):
     data = {}
     for k, v in six.iteritems(row):
         if v is None:
             continue
         if isinstance(v, data_types.Histogram):
             v = v.to_json()
         elif isinstance(v, data_types.WBValue):
             # TODO(jhr): support more wandb data types
             continue
         data[k] = json.loads(json_dumps_safer_history(v))
     self._tbwatcher._sender._save_history(data)
Beispiel #6
0
 def publish_history(self, data, step=None, run=None, publish_step=True):
     run = run or self._run
     data = data_types.history_dict_to_json(run, data, step=step)
     history = pb.HistoryRecord()
     if publish_step:
         assert step is not None
         history.step.num = step
     data.pop("_step", None)
     for k, v in six.iteritems(data):
         item = history.item.add()
         item.key = k
         item.value_json = json_dumps_safer_history(v)  # type: ignore
     self._publish_history(history)
Beispiel #7
0
def stream_tfevents(path, file_api, step=0):
    """Parses and streams a tfevents file to the server"""
    last_step = 0
    row = {}
    buffer = []
    last_row = {}
    for summary in tf.train.summary_iterator(path):
        parsed = tf_summary_to_dict(summary)
        if last_step != parsed["global_step"]:
            step += 1
            last_step = parsed["global_step"]
            # TODO: handle time
            if len(row) > 0:
                last_row = to_json(row)
                buffer.append(Chunk("wandb-history.jsonl",
                                    util.json_dumps_safer_history(to_json(row))))
        row.update(parsed)
    file_api._send(buffer)
    return last_row
Beispiel #8
0
def stream_tfevents(path, file_api, step=0, namespace=""):
    """Parses and streams a tfevents file to the server"""
    last_step = 0
    row = {}
    buffer = []
    last_row = {}
    global_step_key = namespaced_tag("global_step", namespace)
    for summary in tf.train.summary_iterator(path):
        parsed = tf_summary_to_dict(summary, namespace=namespace)
        if last_step != parsed[global_step_key]:
            step += 1
            row["_step"] = step
            last_step = parsed[global_step_key]
            # TODO: handle time
            if len(row) > 0:
                last_row = to_json(row)
                file_api.push("wandb-history.jsonl",
                              util.json_dumps_safer_history(to_json(row)))
        row.update(parsed)
    return last_row
Beispiel #9
0
def stream_tfevents(path, file_api, run, step=0, namespace=""):
    """Parses and streams a tfevents file to the server"""
    last_step = 0
    row = {}
    buffer = []
    last_row = {}
    global_step_key = namespaced_tag("global_step", namespace)
    for summary in tf.train.summary_iterator(path):
        parsed = tf_summary_to_dict(summary, namespace=namespace)
        if last_step != parsed[global_step_key]:
            step += 1
            row["_step"] = step
            last_step = parsed[global_step_key]
            # TODO: handle time
            if len(row) > 0:
                last_row = history_dict_to_json(run, row)
                file_api.push("wandb-history.jsonl",
                              util.json_dumps_safer_history(last_row))
        row.update(parsed)
    # TODO: It's not clear to me why we still have wandb.data_types in last_row here,
    # but we do so we convert again
    return history_dict_to_json(run, last_row)