Example #1
0
    def commit_component_run(
            self,
            component_run: ComponentRun,
            staleness_threshold: int = (60 * 60 * 24 * 30),
    ):
        """Commits a fully initialized component run to the DB."""
        status_dict = component_run.check_completeness()
        if not status_dict["success"]:
            raise RuntimeError(status_dict["msg"])

        if status_dict["msg"]:
            logging.info(status_dict["msg"])

        # Check for staleness
        for dep in component_run.dependencies:
            # First case: there is over a month between component runs
            time_diff = (component_run.start_timestamp -
                         dep.start_timestamp).total_seconds()
            if time_diff > staleness_threshold:
                days_diff = int(time_diff // (60 * 60 * 24))
                component_run.add_staleness_message(
                    f"{dep.component_name} (ID {dep.id}) was run {days_diff}" +
                    " days ago.")
            # Second case: there is a newer run of the dependency
            fresher_runs = self.get_history(
                dep.component_name,
                limit=None,
                date_lower=dep.start_timestamp,
                date_upper=component_run.start_timestamp,
            )
            fresher_runs = [
                cr for cr in fresher_runs if component_run.id != cr.id
            ]
            if len(fresher_runs) > 1:
                run_or_runs = "run" if len(fresher_runs) - 1 == 1 else "runs"
                component_run.add_staleness_message(
                    f"{dep.component_name} (ID {dep.id}) has " +
                    f"{len(fresher_runs) - 1} fresher {run_or_runs} that " +
                    "began before this component run started.")

        # Warn user if there is a staleness message
        if len(component_run.stale) > 0:
            logging.warning(component_run.stale)

        # Dedup labels
        for inp in component_run.inputs:
            inp.dedup_labels()
        for out in component_run.outputs:
            out.dedup_labels()

        # Commit to DB
        self.session.add(component_run)
        logging.info(f"Committing ComponentRun {component_run.id} of type " +
                     f'"{component_run.component_name}" to the database.')
        self.session.commit()
Example #2
0
    def commit_component_run(
            self,
            component_run: ComponentRun,
            staleness_threshold: int = (60 * 60 * 24 * 30),
    ):
        """Commits a fully initialized component run to the DB."""
        status_dict = component_run.check_completeness()
        if not status_dict["success"]:
            raise RuntimeError(status_dict["msg"])

        if status_dict["msg"]:
            logging.info(status_dict["msg"])

        # Check for staleness. https://github.com/loglabs/mltrace/issues/165#issue-891397631
        for dep in component_run.dependencies:
            # First case: there is over a month between component runs
            time_diff = (component_run.start_timestamp -
                         dep.start_timestamp).total_seconds()
            if time_diff > staleness_threshold:
                days_diff = int(time_diff // (60 * 60 * 24))
                component_run.add_staleness_message(
                    f"{dep.component_name} (ID {dep.id}) was run {days_diff} days ago."
                )
            # Second case: there is a newer run of the dependency
            fresher_runs = self.get_history(
                dep.component_name,
                limit=None,
                date_lower=dep.start_timestamp,
                date_upper=component_run.start_timestamp,
            )
            if len(fresher_runs) != 1:
                component_run.add_staleness_message(
                    f"{dep.component_name} (ID {dep.id}) has {len(fresher_runs) - 1} fresher run(s) that began before this component run started."
                )

        # Commit to DB
        self.session.add(component_run)
        logging.info(
            f'Committing ComponentRun of type "{component_run.component_name}" to the database.'
        )
        self.session.commit()