def test_linearize_metrics(): entries = [ ScalarMetricLogEntry("training.loss", 10, datetime.datetime.utcnow(), 100), ScalarMetricLogEntry("training.accuracy", 5, datetime.datetime.utcnow(), 50), ScalarMetricLogEntry("training.loss", 20, datetime.datetime.utcnow(), 200), ScalarMetricLogEntry("training.accuracy", 10, datetime.datetime.utcnow(), 100), ScalarMetricLogEntry("training.accuracy", 15, datetime.datetime.utcnow(), 150), ScalarMetricLogEntry("training.accuracy", 30, datetime.datetime.utcnow(), 300), ] linearized = linearize_metrics(entries) assert type(linearized) == dict assert len(linearized.keys()) == 2 assert "training.loss" in linearized assert "training.accuracy" in linearized assert len(linearized["training.loss"]["steps"]) == 2 assert len(linearized["training.loss"]["values"]) == 2 assert len(linearized["training.loss"]["timestamps"]) == 2 assert len(linearized["training.accuracy"]["steps"]) == 4 assert len(linearized["training.accuracy"]["values"]) == 4 assert len(linearized["training.accuracy"]["timestamps"]) == 4 assert linearized["training.accuracy"]["steps"] == [5, 10, 15, 30] assert linearized["training.accuracy"]["values"] == [50, 100, 150, 300] assert linearized["training.loss"]["steps"] == [10, 20] assert linearized["training.loss"]["values"] == [100, 200]
def _emit_heartbeat(self): beat_time = datetime.datetime.utcnow() self._get_captured_output() # Read all measured metrics since last heartbeat logged_metrics = self._metrics.get_last_metrics() metrics_by_name = linearize_metrics(logged_metrics) for observer in self.observers: self._safe_call(observer, 'log_metrics', metrics_by_name=metrics_by_name, info=self.info) self._safe_call(observer, 'heartbeat_event', info=self.info, captured_out=self.captured_out, beat_time=beat_time, result=self.result)
def _emit_heartbeat(self): beat_time = datetime.datetime.utcnow() self._get_captured_output() # Read all measured metrics since last heartbeat logged_metrics = self._metrics.get_last_metrics() metrics_by_name = linearize_metrics(logged_metrics) for observer in self.observers: self._safe_call(observer, 'log_metrics', metrics_by_name=metrics_by_name, info=self.info) self._safe_call(observer, 'heartbeat_event', info=self.info, captured_out=self.captured_out, beat_time=beat_time, result=self.result)
def test_linearize_metrics(): entries = [ScalarMetricLogEntry("training.loss", 10, datetime.datetime.utcnow(), 100), ScalarMetricLogEntry("training.accuracy", 5, datetime.datetime.utcnow(), 50), ScalarMetricLogEntry("training.loss", 20, datetime.datetime.utcnow(), 200), ScalarMetricLogEntry("training.accuracy", 10, datetime.datetime.utcnow(), 100), ScalarMetricLogEntry("training.accuracy", 15, datetime.datetime.utcnow(), 150), ScalarMetricLogEntry("training.accuracy", 30, datetime.datetime.utcnow(), 300)] linearized = linearize_metrics(entries) assert type(linearized) == dict assert len(linearized.keys()) == 2 assert "training.loss" in linearized assert "training.accuracy" in linearized assert len(linearized["training.loss"]["steps"]) == 2 assert len(linearized["training.loss"]["values"]) == 2 assert len(linearized["training.loss"]["timestamps"]) == 2 assert len(linearized["training.accuracy"]["steps"]) == 4 assert len(linearized["training.accuracy"]["values"]) == 4 assert len(linearized["training.accuracy"]["timestamps"]) == 4 assert linearized["training.accuracy"]["steps"] == [5, 10, 15, 30] assert linearized["training.accuracy"]["values"] == [50, 100, 150, 300] assert linearized["training.loss"]["steps"] == [10, 20] assert linearized["training.loss"]["values"] == [100, 200]
def test_log_metrics(dir_obs, sample_run, logged_metrics): """Test storing of scalar measurements. Test whether measurements logged using _run.metrics.log_scalar_metric are being stored in the metrics.json file. Metrics are stored as a json with each metric indexed by a name (e.g.: 'training.loss'). Each metric for the given name is then stored as three lists: iteration step(steps), the values logged(values) and the timestamp at which the measurement was taken(timestamps) """ # Start the experiment basedir, obs = dir_obs sample_run["_id"] = None _id = obs.started_event(**sample_run) run_dir = basedir.join(str(_id)) # Initialize the info dictionary and standard output with arbitrary values info = {"my_info": [1, 2, 3], "nr": 7} outp = "some output" obs.log_metrics(linearize_metrics(logged_metrics[:6]), info) obs.heartbeat_event(info=info, captured_out=outp, beat_time=T1, result=0) assert run_dir.join("metrics.json").exists() metrics = json.loads(run_dir.join("metrics.json").read()) # Confirm that we have only two metric names registered. # and they have all the information we need. assert len(metrics) == 2 assert "training.loss" in metrics assert "training.accuracy" in metrics for v in ["steps", "values", "timestamps"]: assert v in metrics["training.loss"] assert v in metrics["training.accuracy"] # Verify they have all the information # we logged in the right order. loss = metrics["training.loss"] assert loss["steps"] == [10, 20, 30] assert loss["values"] == [1, 2, 3] for i in range(len(loss["timestamps"]) - 1): assert loss["timestamps"][i] <= loss["timestamps"][i + 1] accuracy = metrics["training.accuracy"] assert accuracy["steps"] == [10, 20, 30] assert accuracy["values"] == [100, 200, 300] # Now, process the remaining events # The metrics shouldn't be overwritten, but appended instead. obs.log_metrics(linearize_metrics(logged_metrics[6:]), info) obs.heartbeat_event(info=info, captured_out=outp, beat_time=T2, result=0) # Reload the new metrics metrics = json.loads(run_dir.join("metrics.json").read()) # The newly added metrics belong to the same run and have the same names, # so the total number of metrics should not change. assert len(metrics) == 2 assert "training.loss" in metrics loss = metrics["training.loss"] assert loss["steps"] == [10, 20, 30, 40, 50, 60] assert loss["values"] == [1, 2, 3, 10, 20, 30] for i in range(len(loss["timestamps"]) - 1): assert loss["timestamps"][i] <= loss["timestamps"][i + 1] # Read the training.accuracy metric and verify it's unchanged assert "training.accuracy" in metrics accuracy = metrics["training.accuracy"] assert accuracy["steps"] == [10, 20, 30] assert accuracy["values"] == [100, 200, 300]
def test_log_metrics(mongo_obs, sample_run, logged_metrics): """ Test storing scalar measurements Test whether measurements logged using _run.metrics.log_scalar_metric are being stored in the 'metrics' collection and that the experiment 'info' dictionary contains a valid reference to the metrics collection for each of the metric. Metrics are identified by name (e.g.: 'training.loss') and by the experiment run that produced them. Each metric contains a list of x values (e.g. iteration step), y values (measured values) and timestamps of when each of the measurements was taken. """ # Start the experiment mongo_obs.started_event(**sample_run) # Initialize the info dictionary and standard output with arbitrary values info = {'my_info': [1, 2, 3], 'nr': 7} outp = 'some output' # Take first 6 measured events, group them by metric name # and store the measured series to the 'metrics' collection # and reference the newly created records in the 'info' dictionary. mongo_obs.log_metrics(linearize_metrics(logged_metrics[:6]), info) # Call standard heartbeat event (store the info dictionary to the database) mongo_obs.heartbeat_event(info=info, captured_out=outp, beat_time=T1, result=0) # There should be only one run stored assert mongo_obs.runs.count() == 1 db_run = mongo_obs.runs.find_one() # ... and the info dictionary should contain a list of created metrics assert "metrics" in db_run['info'] assert type(db_run['info']["metrics"]) == list # The metrics, stored in the metrics collection, # should be two (training.loss and training.accuracy) assert mongo_obs.metrics.count() == 2 # Read the training.loss metric and make sure it references the correct run # and that the run (in the info dictionary) references the correct metric record. loss = mongo_obs.metrics.find_one({"name": "training.loss", "run_id": db_run['_id']}) assert {"name": "training.loss", "id": str(loss["_id"])} in db_run['info']["metrics"] assert loss["steps"] == [10, 20, 30] assert loss["values"] == [1, 2, 3] for i in range(len(loss["timestamps"]) - 1): assert loss["timestamps"][i] <= loss["timestamps"][i + 1] # Read the training.accuracy metric and check the references as with the training.loss above accuracy = mongo_obs.metrics.find_one({"name": "training.accuracy", "run_id": db_run['_id']}) assert {"name": "training.accuracy", "id": str(accuracy["_id"])} in db_run['info']["metrics"] assert accuracy["steps"] == [10, 20, 30] assert accuracy["values"] == [100, 200, 300] # Now, process the remaining events # The metrics shouldn't be overwritten, but appended instead. mongo_obs.log_metrics(linearize_metrics(logged_metrics[6:]), info) mongo_obs.heartbeat_event(info=info, captured_out=outp, beat_time=T2, result=0) assert mongo_obs.runs.count() == 1 db_run = mongo_obs.runs.find_one() assert "metrics" in db_run['info'] # The newly added metrics belong to the same run and have the same names, so the total number # of metrics should not change. assert mongo_obs.metrics.count() == 2 loss = mongo_obs.metrics.find_one({"name": "training.loss", "run_id": db_run['_id']}) assert {"name": "training.loss", "id": str(loss["_id"])} in db_run['info']["metrics"] # ... but the values should be appended to the original list assert loss["steps"] == [10, 20, 30, 40, 50, 60] assert loss["values"] == [1, 2, 3, 10, 20, 30] for i in range(len(loss["timestamps"]) - 1): assert loss["timestamps"][i] <= loss["timestamps"][i + 1] accuracy = mongo_obs.metrics.find_one({"name": "training.accuracy", "run_id": db_run['_id']}) assert {"name": "training.accuracy", "id": str(accuracy["_id"])} in db_run['info']["metrics"] assert accuracy["steps"] == [10, 20, 30] assert accuracy["values"] == [100, 200, 300] # Make sure that when starting a new experiment, new records in metrics are created # instead of appending to the old ones. sample_run["_id"] = "NEWID" # Start the experiment mongo_obs.started_event(**sample_run) mongo_obs.log_metrics(linearize_metrics(logged_metrics[:4]), info) mongo_obs.heartbeat_event(info=info, captured_out=outp, beat_time=T1, result=0) # A new run has been created assert mongo_obs.runs.count() == 2 # Another 2 metrics have been created assert mongo_obs.metrics.count() == 4
def test_log_metrics(dir_obs, sample_run, logged_metrics): """Test storing of scalar measurements. Test whether measurements logged using _run.metrics.log_scalar_metric are being stored in the metrics.json file. Metrics are stored as a json with each metric indexed by a name (e.g.: 'training.loss'). Each metric for the given name is then stored as three lists: iteration step(steps), the values logged(values) and the timestamp at which the measurement was taken(timestamps) """ # Start the experiment basedir, obs = dir_obs sample_run['_id'] = None _id = obs.started_event(**sample_run) run_dir = basedir.join(str(_id)) # Initialize the info dictionary and standard output with arbitrary values info = {'my_info': [1, 2, 3], 'nr': 7} outp = 'some output' obs.log_metrics(linearize_metrics(logged_metrics[:6]), info) obs.heartbeat_event(info=info, captured_out=outp, beat_time=T1, result=0) assert run_dir.join('metrics.json').exists() metrics = json.loads(run_dir.join('metrics.json').read()) # Confirm that we have only two metric names registered. # and they have all the information we need. assert len(metrics) == 2 assert "training.loss" in metrics assert "training.accuracy" in metrics for v in ["steps","values","timestamps"]: assert v in metrics["training.loss"] assert v in metrics["training.accuracy"] # Verify they have all the information # we logged in the right order. loss = metrics["training.loss"] assert loss["steps"] == [10, 20, 30] assert loss["values"] == [1, 2, 3] for i in range(len(loss["timestamps"]) - 1): assert loss["timestamps"][i] <= loss["timestamps"][i + 1] accuracy = metrics["training.accuracy"] assert accuracy["steps"] == [10, 20, 30] assert accuracy["values"] == [100, 200, 300] # Now, process the remaining events # The metrics shouldn't be overwritten, but appended instead. obs.log_metrics(linearize_metrics(logged_metrics[6:]), info) obs.heartbeat_event(info=info, captured_out=outp, beat_time=T2, result=0) # Reload the new metrics metrics = json.loads(run_dir.join('metrics.json').read()) # The newly added metrics belong to the same run and have the same names, # so the total number of metrics should not change. assert len(metrics) == 2 assert "training.loss" in metrics loss = metrics["training.loss"] assert loss["steps"] == [10, 20, 30, 40, 50, 60] assert loss["values"] == [1, 2, 3, 10, 20, 30] for i in range(len(loss["timestamps"]) - 1): assert loss["timestamps"][i] <= loss["timestamps"][i + 1] # Read the training.accuracy metric and verify it's unchanged assert "training.accuracy" in metrics accuracy = metrics["training.accuracy"] assert accuracy["steps"] == [10, 20, 30] assert accuracy["values"] == [100, 200, 300]
def test_log_metrics(mongo_obs, sample_run, logged_metrics): """ Test storing scalar measurements Test whether measurements logged using _run.metrics.log_scalar_metric are being stored in the 'metrics' collection and that the experiment 'info' dictionary contains a valid reference to the metrics collection for each of the metric. Metrics are identified by name (e.g.: 'training.loss') and by the experiment run that produced them. Each metric contains a list of x values (e.g. iteration step), y values (measured values) and timestamps of when each of the measurements was taken. """ # Start the experiment mongo_obs.started_event(**sample_run) # Initialize the info dictionary and standard output with arbitrary values info = {'my_info': [1, 2, 3], 'nr': 7} outp = 'some output' # Take first 6 measured events, group them by metric name # and store the measured series to the 'metrics' collection # and reference the newly created records in the 'info' dictionary. mongo_obs.log_metrics(linearize_metrics(logged_metrics[:6]), info) # Call standard heartbeat event (store the info dictionary to the database) mongo_obs.heartbeat_event(info=info, captured_out=outp, beat_time=T1, result=0) # There should be only one run stored assert mongo_obs.runs.count() == 1 db_run = mongo_obs.runs.find_one() # ... and the info dictionary should contain a list of created metrics assert "metrics" in db_run['info'] assert type(db_run['info']["metrics"]) == list # The metrics, stored in the metrics collection, # should be two (training.loss and training.accuracy) assert mongo_obs.metrics.count() == 2 # Read the training.loss metric and make sure it references the correct run # and that the run (in the info dictionary) references the correct metric record. loss = mongo_obs.metrics.find_one({"name": "training.loss", "run_id": db_run['_id']}) assert {"name": "training.loss", "id": str(loss["_id"])} in db_run['info']["metrics"] assert loss["steps"] == [10, 20, 30] assert loss["values"] == [1, 2, 3] for i in range(len(loss["timestamps"]) - 1): assert loss["timestamps"][i] <= loss["timestamps"][i + 1] # Read the training.accuracy metric and check the references as with the training.loss above accuracy = mongo_obs.metrics.find_one({"name": "training.accuracy", "run_id": db_run['_id']}) assert {"name": "training.accuracy", "id": str(accuracy["_id"])} in db_run['info']["metrics"] assert accuracy["steps"] == [10, 20, 30] assert accuracy["values"] == [100, 200, 300] # Now, process the remaining events # The metrics shouldn't be overwritten, but appended instead. mongo_obs.log_metrics(linearize_metrics(logged_metrics[6:]), info) mongo_obs.heartbeat_event(info=info, captured_out=outp, beat_time=T2, result=0) assert mongo_obs.runs.count() == 1 db_run = mongo_obs.runs.find_one() assert "metrics" in db_run['info'] # The newly added metrics belong to the same run and have the same names, so the total number # of metrics should not change. assert mongo_obs.metrics.count() == 2 loss = mongo_obs.metrics.find_one({"name": "training.loss", "run_id": db_run['_id']}) assert {"name": "training.loss", "id": str(loss["_id"])} in db_run['info']["metrics"] # ... but the values should be appended to the original list assert loss["steps"] == [10, 20, 30, 40, 50, 60] assert loss["values"] == [1, 2, 3, 10, 20, 30] for i in range(len(loss["timestamps"]) - 1): assert loss["timestamps"][i] <= loss["timestamps"][i + 1] accuracy = mongo_obs.metrics.find_one({"name": "training.accuracy", "run_id": db_run['_id']}) assert {"name": "training.accuracy", "id": str(accuracy["_id"])} in db_run['info']["metrics"] assert accuracy["steps"] == [10, 20, 30] assert accuracy["values"] == [100, 200, 300] # Make sure that when starting a new experiment, new records in metrics are created # instead of appending to the old ones. sample_run["_id"] = "NEWID" # Start the experiment mongo_obs.started_event(**sample_run) mongo_obs.log_metrics(linearize_metrics(logged_metrics[:4]), info) mongo_obs.heartbeat_event(info=info, captured_out=outp, beat_time=T1, result=0) # A new run has been created assert mongo_obs.runs.count() == 2 # Another 2 metrics have been created assert mongo_obs.metrics.count() == 4
def export(self, observer, base_dir, remove_sources=False, overwrite=None): """ Exports the file log into another observer. Requires sacred to be installed. Args: observer: Observer to export to base_dir: root path to sources remove_sources: if sources are too complicated to match overwrite: whether to overwrite an experiment """ from sacred.metrics_logger import ScalarMetricLogEntry, linearize_metrics # Start simulation of run experiment = self.experiment.copy() experiment['base_dir'] = base_dir # FIXME experiment['sources'] = [] if remove_sources else update_source_path_prefix(base_dir, experiment['sources']) try: observer.started_event( experiment, self.command, self.host, datetime.datetime.fromisoformat(self.start_time), self.config, self.meta, _id=overwrite ) except FileNotFoundError as e: raise FileNotFoundError("The sources are incorrect. Try fixing paths or use `remove_sources=True`." f" Original error: {e}") # Add artifacts for artifact_name in self.artifacts: observer.artifact_event( name=artifact_name, filename=(self.path / artifact_name) ) # Add resources for resource in self.resources: observer.resource_event(resource[0]) # Add metrics size_metrics = {} # If overwrite, get the already added metrics. # FIXME: issue if steps are not increasing if overwrite is not None: metrics = observer.metrics.find({"run_id": overwrite}) for metric in metrics: size_metrics[metric['name']] = len(metric['steps']) log_metrics = [] for metric_name, metric in self.metrics.items(): steps = metric['steps'] if metric_name not in size_metrics else metric['steps'][size_metrics[metric_name]:] timestamps = metric['timestamps'] if metric_name not in size_metrics else metric['timestamps'][ size_metrics[metric_name]:] values = metric['values'] if metric_name not in size_metrics else metric['values'][ size_metrics[metric_name]:] for step, timestamp, value in zip(steps, timestamps, values): metric_log_entry = ScalarMetricLogEntry(metric_name, step, datetime.datetime.fromisoformat(timestamp), value) log_metrics.append(metric_log_entry) observer.log_metrics(linearize_metrics(log_metrics), {}) observer.heartbeat_event( info=self.info if 'info' in self.run else None, captured_out=self.cout, beat_time=datetime.datetime.fromisoformat(self.heartbeat), result=self.result ) # End simulation if self.status != "RUNNING": stop_time = datetime.datetime.fromisoformat(self.stop_time) if self.status in ["COMPLETED", "RUNNING"]: # If still running we force it as a finished experiment observer.completed_event(stop_time, self.result) elif self.status == "INTERRUPTED": observer.interrupted_event(stop_time, 'INTERRUPTED') elif self.status == "FAILED": observer.failed_event(stop_time, self.fail_trace)