def test_linearize_metrics():
    entries = [
        ScalarMetricLogEntry("training.loss", 10, datetime.datetime.utcnow(),
                             100),
        ScalarMetricLogEntry("training.accuracy", 5,
                             datetime.datetime.utcnow(), 50),
        ScalarMetricLogEntry("training.loss", 20, datetime.datetime.utcnow(),
                             200),
        ScalarMetricLogEntry("training.accuracy", 10,
                             datetime.datetime.utcnow(), 100),
        ScalarMetricLogEntry("training.accuracy", 15,
                             datetime.datetime.utcnow(), 150),
        ScalarMetricLogEntry("training.accuracy", 30,
                             datetime.datetime.utcnow(), 300),
    ]
    linearized = linearize_metrics(entries)
    assert type(linearized) == dict
    assert len(linearized.keys()) == 2
    assert "training.loss" in linearized
    assert "training.accuracy" in linearized
    assert len(linearized["training.loss"]["steps"]) == 2
    assert len(linearized["training.loss"]["values"]) == 2
    assert len(linearized["training.loss"]["timestamps"]) == 2
    assert len(linearized["training.accuracy"]["steps"]) == 4
    assert len(linearized["training.accuracy"]["values"]) == 4
    assert len(linearized["training.accuracy"]["timestamps"]) == 4
    assert linearized["training.accuracy"]["steps"] == [5, 10, 15, 30]
    assert linearized["training.accuracy"]["values"] == [50, 100, 150, 300]
    assert linearized["training.loss"]["steps"] == [10, 20]
    assert linearized["training.loss"]["values"] == [100, 200]
Example #2
0
 def _emit_heartbeat(self):
     beat_time = datetime.datetime.utcnow()
     self._get_captured_output()
     # Read all measured metrics since last heartbeat
     logged_metrics = self._metrics.get_last_metrics()
     metrics_by_name = linearize_metrics(logged_metrics)
     for observer in self.observers:
         self._safe_call(observer, 'log_metrics',
                         metrics_by_name=metrics_by_name,
                         info=self.info)
         self._safe_call(observer, 'heartbeat_event',
                         info=self.info,
                         captured_out=self.captured_out,
                         beat_time=beat_time,
                         result=self.result)
Example #3
0
 def _emit_heartbeat(self):
     beat_time = datetime.datetime.utcnow()
     self._get_captured_output()
     # Read all measured metrics since last heartbeat
     logged_metrics = self._metrics.get_last_metrics()
     metrics_by_name = linearize_metrics(logged_metrics)
     for observer in self.observers:
         self._safe_call(observer, 'log_metrics',
                         metrics_by_name=metrics_by_name,
                         info=self.info)
         self._safe_call(observer, 'heartbeat_event',
                         info=self.info,
                         captured_out=self.captured_out,
                         beat_time=beat_time,
                         result=self.result)
Example #4
0
def test_linearize_metrics():
    entries = [ScalarMetricLogEntry("training.loss", 10, datetime.datetime.utcnow(), 100),
               ScalarMetricLogEntry("training.accuracy", 5, datetime.datetime.utcnow(), 50),
               ScalarMetricLogEntry("training.loss", 20, datetime.datetime.utcnow(), 200),
               ScalarMetricLogEntry("training.accuracy", 10, datetime.datetime.utcnow(), 100),
               ScalarMetricLogEntry("training.accuracy", 15, datetime.datetime.utcnow(), 150),
               ScalarMetricLogEntry("training.accuracy", 30, datetime.datetime.utcnow(), 300)]
    linearized = linearize_metrics(entries)
    assert type(linearized) == dict
    assert len(linearized.keys()) == 2
    assert "training.loss" in linearized
    assert "training.accuracy" in linearized
    assert len(linearized["training.loss"]["steps"]) == 2
    assert len(linearized["training.loss"]["values"]) == 2
    assert len(linearized["training.loss"]["timestamps"]) == 2
    assert len(linearized["training.accuracy"]["steps"]) == 4
    assert len(linearized["training.accuracy"]["values"]) == 4
    assert len(linearized["training.accuracy"]["timestamps"]) == 4
    assert linearized["training.accuracy"]["steps"] == [5, 10, 15, 30]
    assert linearized["training.accuracy"]["values"] == [50, 100, 150, 300]
    assert linearized["training.loss"]["steps"] == [10, 20]
    assert linearized["training.loss"]["values"] == [100, 200]
Example #5
0
def test_log_metrics(dir_obs, sample_run, logged_metrics):
    """Test storing of scalar measurements.

    Test whether measurements logged using _run.metrics.log_scalar_metric
    are being stored in the metrics.json file.

    Metrics are stored as a json with each metric indexed by a name 
    (e.g.: 'training.loss'). Each metric for the given name is then
    stored as three lists: iteration step(steps), the values logged(values)
    and the timestamp at which the measurement was taken(timestamps)
    """

    # Start the experiment
    basedir, obs = dir_obs
    sample_run["_id"] = None
    _id = obs.started_event(**sample_run)
    run_dir = basedir.join(str(_id))

    # Initialize the info dictionary and standard output with arbitrary values
    info = {"my_info": [1, 2, 3], "nr": 7}
    outp = "some output"

    obs.log_metrics(linearize_metrics(logged_metrics[:6]), info)
    obs.heartbeat_event(info=info, captured_out=outp, beat_time=T1, result=0)

    assert run_dir.join("metrics.json").exists()
    metrics = json.loads(run_dir.join("metrics.json").read())

    # Confirm that we have only two metric names registered.
    # and they have all the information we need.
    assert len(metrics) == 2
    assert "training.loss" in metrics
    assert "training.accuracy" in metrics
    for v in ["steps", "values", "timestamps"]:
        assert v in metrics["training.loss"]
        assert v in metrics["training.accuracy"]

    # Verify they have all the information
    # we logged in the right order.
    loss = metrics["training.loss"]
    assert loss["steps"] == [10, 20, 30]
    assert loss["values"] == [1, 2, 3]
    for i in range(len(loss["timestamps"]) - 1):
        assert loss["timestamps"][i] <= loss["timestamps"][i + 1]

    accuracy = metrics["training.accuracy"]
    assert accuracy["steps"] == [10, 20, 30]
    assert accuracy["values"] == [100, 200, 300]

    # Now, process the remaining events
    # The metrics shouldn't be overwritten, but appended instead.
    obs.log_metrics(linearize_metrics(logged_metrics[6:]), info)
    obs.heartbeat_event(info=info, captured_out=outp, beat_time=T2, result=0)

    # Reload the new metrics
    metrics = json.loads(run_dir.join("metrics.json").read())

    # The newly added metrics belong to the same run and have the same names,
    # so the total number of metrics should not change.
    assert len(metrics) == 2

    assert "training.loss" in metrics
    loss = metrics["training.loss"]
    assert loss["steps"] == [10, 20, 30, 40, 50, 60]
    assert loss["values"] == [1, 2, 3, 10, 20, 30]
    for i in range(len(loss["timestamps"]) - 1):
        assert loss["timestamps"][i] <= loss["timestamps"][i + 1]

    # Read the training.accuracy metric and verify it's unchanged
    assert "training.accuracy" in metrics
    accuracy = metrics["training.accuracy"]
    assert accuracy["steps"] == [10, 20, 30]
    assert accuracy["values"] == [100, 200, 300]
Example #6
0
def test_log_metrics(mongo_obs, sample_run, logged_metrics):
    """
    Test storing scalar measurements
    
    Test whether measurements logged using _run.metrics.log_scalar_metric
    are being stored in the 'metrics' collection
    and that the experiment 'info' dictionary contains a valid reference 
    to the metrics collection for each of the metric.
    
    Metrics are identified by name (e.g.: 'training.loss') and by the 
    experiment run that produced them. Each metric contains a list of x values
    (e.g. iteration step), y values (measured values) and timestamps of when 
    each of the measurements was taken.
    """

    # Start the experiment
    mongo_obs.started_event(**sample_run)

    # Initialize the info dictionary and standard output with arbitrary values
    info = {'my_info': [1, 2, 3], 'nr': 7}
    outp = 'some output'

    # Take first 6 measured events, group them by metric name
    # and store the measured series to the 'metrics' collection
    # and reference the newly created records in the 'info' dictionary.
    mongo_obs.log_metrics(linearize_metrics(logged_metrics[:6]), info)
    # Call standard heartbeat event (store the info dictionary to the database)
    mongo_obs.heartbeat_event(info=info, captured_out=outp, beat_time=T1,
                              result=0)

    # There should be only one run stored
    assert mongo_obs.runs.count() == 1
    db_run = mongo_obs.runs.find_one()
    # ... and the info dictionary should contain a list of created metrics
    assert "metrics" in db_run['info']
    assert type(db_run['info']["metrics"]) == list

    # The metrics, stored in the metrics collection,
    # should be two (training.loss and training.accuracy)
    assert mongo_obs.metrics.count() == 2
    # Read the training.loss metric and make sure it references the correct run
    # and that the run (in the info dictionary) references the correct metric record.
    loss = mongo_obs.metrics.find_one({"name": "training.loss", "run_id": db_run['_id']})
    assert {"name": "training.loss", "id": str(loss["_id"])} in db_run['info']["metrics"]
    assert loss["steps"] == [10, 20, 30]
    assert loss["values"] == [1, 2, 3]
    for i in range(len(loss["timestamps"]) - 1):
        assert loss["timestamps"][i] <= loss["timestamps"][i + 1]

    # Read the training.accuracy metric and check the references as with the training.loss above
    accuracy = mongo_obs.metrics.find_one({"name": "training.accuracy", "run_id": db_run['_id']})
    assert {"name": "training.accuracy", "id": str(accuracy["_id"])} in db_run['info']["metrics"]
    assert accuracy["steps"] == [10, 20, 30]
    assert accuracy["values"] == [100, 200, 300]

    # Now, process the remaining events
    # The metrics shouldn't be overwritten, but appended instead.
    mongo_obs.log_metrics(linearize_metrics(logged_metrics[6:]), info)
    mongo_obs.heartbeat_event(info=info, captured_out=outp, beat_time=T2,
                              result=0)

    assert mongo_obs.runs.count() == 1
    db_run = mongo_obs.runs.find_one()
    assert "metrics" in db_run['info']

    # The newly added metrics belong to the same run and have the same names, so the total number
    # of metrics should not change.
    assert mongo_obs.metrics.count() == 2
    loss = mongo_obs.metrics.find_one({"name": "training.loss", "run_id": db_run['_id']})
    assert {"name": "training.loss", "id": str(loss["_id"])} in db_run['info']["metrics"]
    # ... but the values should be appended to the original list
    assert loss["steps"] == [10, 20, 30, 40, 50, 60]
    assert loss["values"] == [1, 2, 3, 10, 20, 30]
    for i in range(len(loss["timestamps"]) - 1):
        assert loss["timestamps"][i] <= loss["timestamps"][i + 1]

    accuracy = mongo_obs.metrics.find_one({"name": "training.accuracy", "run_id": db_run['_id']})
    assert {"name": "training.accuracy", "id": str(accuracy["_id"])} in db_run['info']["metrics"]
    assert accuracy["steps"] == [10, 20, 30]
    assert accuracy["values"] == [100, 200, 300]

    # Make sure that when starting a new experiment, new records in metrics are created
    # instead of appending to the old ones.
    sample_run["_id"] = "NEWID"
    # Start the experiment
    mongo_obs.started_event(**sample_run)
    mongo_obs.log_metrics(linearize_metrics(logged_metrics[:4]), info)
    mongo_obs.heartbeat_event(info=info, captured_out=outp, beat_time=T1,
                              result=0)
    # A new run has been created
    assert mongo_obs.runs.count() == 2
    # Another 2 metrics have been created
    assert mongo_obs.metrics.count() == 4
def test_log_metrics(dir_obs, sample_run, logged_metrics):
    """Test storing of scalar measurements.

    Test whether measurements logged using _run.metrics.log_scalar_metric
    are being stored in the metrics.json file.

    Metrics are stored as a json with each metric indexed by a name 
    (e.g.: 'training.loss'). Each metric for the given name is then
    stored as three lists: iteration step(steps), the values logged(values)
    and the timestamp at which the measurement was taken(timestamps)
    """

    # Start the experiment 
    basedir, obs = dir_obs
    sample_run['_id'] = None
    _id = obs.started_event(**sample_run)    
    run_dir = basedir.join(str(_id))

    # Initialize the info dictionary and standard output with arbitrary values
    info = {'my_info': [1, 2, 3], 'nr': 7}
    outp = 'some output'

    obs.log_metrics(linearize_metrics(logged_metrics[:6]), info)
    obs.heartbeat_event(info=info, captured_out=outp, beat_time=T1,
                              result=0)


    assert run_dir.join('metrics.json').exists()
    metrics = json.loads(run_dir.join('metrics.json').read())


    # Confirm that we have only two metric names registered.
    # and they have all the information we need.
    assert len(metrics) == 2
    assert "training.loss" in metrics
    assert "training.accuracy" in metrics
    for v in ["steps","values","timestamps"]:
        assert v in metrics["training.loss"] 
        assert v in metrics["training.accuracy"]


    # Verify they have all the information 
    # we logged in the right order.
    loss = metrics["training.loss"]
    assert loss["steps"] == [10, 20, 30]
    assert loss["values"] == [1, 2, 3]
    for i in range(len(loss["timestamps"]) - 1):
        assert loss["timestamps"][i] <= loss["timestamps"][i + 1]

    accuracy = metrics["training.accuracy"]
    assert accuracy["steps"] == [10, 20, 30]
    assert accuracy["values"] == [100, 200, 300]


    # Now, process the remaining events
    # The metrics shouldn't be overwritten, but appended instead.
    obs.log_metrics(linearize_metrics(logged_metrics[6:]), info)
    obs.heartbeat_event(info=info, captured_out=outp, beat_time=T2,
                              result=0)

    # Reload the new metrics
    metrics = json.loads(run_dir.join('metrics.json').read())

    # The newly added metrics belong to the same run and have the same names,
    # so the total number of metrics should not change.
    assert len(metrics) == 2

    assert "training.loss" in metrics
    loss = metrics["training.loss"]
    assert loss["steps"] == [10, 20, 30, 40, 50, 60]
    assert loss["values"] == [1, 2, 3, 10, 20, 30]
    for i in range(len(loss["timestamps"]) - 1):
        assert loss["timestamps"][i] <= loss["timestamps"][i + 1]


    # Read the training.accuracy metric and verify it's unchanged
    assert "training.accuracy" in metrics
    accuracy = metrics["training.accuracy"]
    assert accuracy["steps"] == [10, 20, 30]
    assert accuracy["values"] == [100, 200, 300]
Example #8
0
def test_log_metrics(mongo_obs, sample_run, logged_metrics):
    """
    Test storing scalar measurements
    
    Test whether measurements logged using _run.metrics.log_scalar_metric
    are being stored in the 'metrics' collection
    and that the experiment 'info' dictionary contains a valid reference 
    to the metrics collection for each of the metric.
    
    Metrics are identified by name (e.g.: 'training.loss') and by the 
    experiment run that produced them. Each metric contains a list of x values
    (e.g. iteration step), y values (measured values) and timestamps of when 
    each of the measurements was taken.
    """

    # Start the experiment
    mongo_obs.started_event(**sample_run)

    # Initialize the info dictionary and standard output with arbitrary values
    info = {'my_info': [1, 2, 3], 'nr': 7}
    outp = 'some output'

    # Take first 6 measured events, group them by metric name
    # and store the measured series to the 'metrics' collection
    # and reference the newly created records in the 'info' dictionary.
    mongo_obs.log_metrics(linearize_metrics(logged_metrics[:6]), info)
    # Call standard heartbeat event (store the info dictionary to the database)
    mongo_obs.heartbeat_event(info=info, captured_out=outp, beat_time=T1,
                              result=0)

    # There should be only one run stored
    assert mongo_obs.runs.count() == 1
    db_run = mongo_obs.runs.find_one()
    # ... and the info dictionary should contain a list of created metrics
    assert "metrics" in db_run['info']
    assert type(db_run['info']["metrics"]) == list

    # The metrics, stored in the metrics collection,
    # should be two (training.loss and training.accuracy)
    assert mongo_obs.metrics.count() == 2
    # Read the training.loss metric and make sure it references the correct run
    # and that the run (in the info dictionary) references the correct metric record.
    loss = mongo_obs.metrics.find_one({"name": "training.loss", "run_id": db_run['_id']})
    assert {"name": "training.loss", "id": str(loss["_id"])} in db_run['info']["metrics"]
    assert loss["steps"] == [10, 20, 30]
    assert loss["values"] == [1, 2, 3]
    for i in range(len(loss["timestamps"]) - 1):
        assert loss["timestamps"][i] <= loss["timestamps"][i + 1]

    # Read the training.accuracy metric and check the references as with the training.loss above
    accuracy = mongo_obs.metrics.find_one({"name": "training.accuracy", "run_id": db_run['_id']})
    assert {"name": "training.accuracy", "id": str(accuracy["_id"])} in db_run['info']["metrics"]
    assert accuracy["steps"] == [10, 20, 30]
    assert accuracy["values"] == [100, 200, 300]

    # Now, process the remaining events
    # The metrics shouldn't be overwritten, but appended instead.
    mongo_obs.log_metrics(linearize_metrics(logged_metrics[6:]), info)
    mongo_obs.heartbeat_event(info=info, captured_out=outp, beat_time=T2,
                              result=0)

    assert mongo_obs.runs.count() == 1
    db_run = mongo_obs.runs.find_one()
    assert "metrics" in db_run['info']

    # The newly added metrics belong to the same run and have the same names, so the total number
    # of metrics should not change.
    assert mongo_obs.metrics.count() == 2
    loss = mongo_obs.metrics.find_one({"name": "training.loss", "run_id": db_run['_id']})
    assert {"name": "training.loss", "id": str(loss["_id"])} in db_run['info']["metrics"]
    # ... but the values should be appended to the original list
    assert loss["steps"] == [10, 20, 30, 40, 50, 60]
    assert loss["values"] == [1, 2, 3, 10, 20, 30]
    for i in range(len(loss["timestamps"]) - 1):
        assert loss["timestamps"][i] <= loss["timestamps"][i + 1]

    accuracy = mongo_obs.metrics.find_one({"name": "training.accuracy", "run_id": db_run['_id']})
    assert {"name": "training.accuracy", "id": str(accuracy["_id"])} in db_run['info']["metrics"]
    assert accuracy["steps"] == [10, 20, 30]
    assert accuracy["values"] == [100, 200, 300]

    # Make sure that when starting a new experiment, new records in metrics are created
    # instead of appending to the old ones.
    sample_run["_id"] = "NEWID"
    # Start the experiment
    mongo_obs.started_event(**sample_run)
    mongo_obs.log_metrics(linearize_metrics(logged_metrics[:4]), info)
    mongo_obs.heartbeat_event(info=info, captured_out=outp, beat_time=T1,
                              result=0)
    # A new run has been created
    assert mongo_obs.runs.count() == 2
    # Another 2 metrics have been created
    assert mongo_obs.metrics.count() == 4
Example #9
0
    def export(self, observer, base_dir, remove_sources=False,
               overwrite=None):
        """
        Exports the file log into another observer.
        Requires sacred to be installed.
        Args:
            observer: Observer to export to
            base_dir: root path to sources
            remove_sources: if sources are too complicated to match
            overwrite: whether to overwrite an experiment
        """
        from sacred.metrics_logger import ScalarMetricLogEntry, linearize_metrics

        # Start simulation of run
        experiment = self.experiment.copy()
        experiment['base_dir'] = base_dir
        # FIXME
        experiment['sources'] = [] if remove_sources else update_source_path_prefix(base_dir, experiment['sources'])
        try:
            observer.started_event(
                experiment,
                self.command,
                self.host,
                datetime.datetime.fromisoformat(self.start_time),
                self.config,
                self.meta,
                _id=overwrite
            )
        except FileNotFoundError as e:
            raise FileNotFoundError("The sources are incorrect. Try fixing paths or use `remove_sources=True`."
                                    f" Original error: {e}")

        # Add artifacts
        for artifact_name in self.artifacts:
            observer.artifact_event(
                name=artifact_name,
                filename=(self.path / artifact_name)
            )

        # Add resources
        for resource in self.resources:
            observer.resource_event(resource[0])

        # Add metrics
        size_metrics = {}
        # If overwrite, get the already added metrics.
        # FIXME: issue if steps are not increasing
        if overwrite is not None:
            metrics = observer.metrics.find({"run_id": overwrite})
            for metric in metrics:
                size_metrics[metric['name']] = len(metric['steps'])

        log_metrics = []
        for metric_name, metric in self.metrics.items():
            steps = metric['steps'] if metric_name not in size_metrics else metric['steps'][size_metrics[metric_name]:]
            timestamps = metric['timestamps'] if metric_name not in size_metrics else metric['timestamps'][
                                                                                      size_metrics[metric_name]:]
            values = metric['values'] if metric_name not in size_metrics else metric['values'][
                                                                              size_metrics[metric_name]:]
            for step, timestamp, value in zip(steps, timestamps, values):
                metric_log_entry = ScalarMetricLogEntry(metric_name, step,
                                                        datetime.datetime.fromisoformat(timestamp), value)
                log_metrics.append(metric_log_entry)
        observer.log_metrics(linearize_metrics(log_metrics), {})

        observer.heartbeat_event(
            info=self.info if 'info' in self.run else None,
            captured_out=self.cout,
            beat_time=datetime.datetime.fromisoformat(self.heartbeat),
            result=self.result
        )

        # End simulation
        if self.status != "RUNNING":
            stop_time = datetime.datetime.fromisoformat(self.stop_time)

            if self.status in ["COMPLETED", "RUNNING"]:  # If still running we force it as a finished experiment
                observer.completed_event(stop_time, self.result)
            elif self.status == "INTERRUPTED":
                observer.interrupted_event(stop_time, 'INTERRUPTED')
            elif self.status == "FAILED":
                observer.failed_event(stop_time, self.fail_trace)