Ejemplo n.º 1
0
def safe_run(function,
             name=None,
             backoff=0.25,     # Backoff time increment
             backoff_max=60,   # Longest allowable backoff
             restart=True      # Call again if the function returns
             ):

    """
    Safely call a long-running lambda (usually a main program),
    catching and logging exceptions.  The lambda will be re-called
    immediately if it simply returns or after a linearly-increasing
    backoff if it raises an exception.  The backoff always applies if
    the function has not yet run successfully and time will reset once
    the lambda runs any longer than the last backoff delay.
    """

    if not isinstance(function, type(lambda:0)):
        raise ValueError("Function provided is not a lambda.")

    log = pscheduler.Log(name=name,
                         prefix='safe_run',
                         signals=False,
                         quiet=True
                         )

    initial_backoff = backoff
    current_backoff = backoff
    runs = 0

    while True:

        try:
            started = pscheduler.time_now()
            function()
            runs += 1
            if not restart:
                break

        except KeyboardInterrupt:
            break

        except Exception as ex:
            ran = pscheduler.time_now() - started
            ran_seconds = pscheduler.timedelta_as_seconds(ran)

            log.error("Program threw an exception after %s", ran)
            log.exception()

            # Running longer than the backoff is a good excuse to try
            # starting over.
            if ran_seconds > current_backoff and runs != 0:
                currrent_backoff = initial_backoff
                log.error("Restarting immediately.")
            else:
                log.error("Waiting %s seconds before restarting", current_backoff)
                time.sleep(current_backoff)
                if current_backoff < backoff_max:
                    current_backoff += initial_backoff
                log.error("Restarting")
Ejemplo n.º 2
0
def safe_run(
    function,
    name=None,
    backoff=0.25,  # Backoff time increment
    backoff_max=60,  # Longest allowable backoff
    restart=True  # Call again if the function returns
):
    """
    Safely call a long-running lambda (usually a main program),
    catching and logging exceptions.  The lambda will be re-called
    immediately if it simply returns or after a linearly-increasing
    backoff if it raises an exception.  The backoff always applies if
    the function has not yet run successfully and time will reset once
    the lambda runs any longer than the last backoff delay.
    """

    if not isinstance(function, type(lambda: 0)):
        raise ValueError("Function provided is not a lambda.")

    log = pscheduler.Log(name=name,
                         prefix='safe_run',
                         signals=False,
                         quiet=True)

    initial_backoff = backoff
    current_backoff = backoff
    runs = 0

    while True:

        try:
            started = pscheduler.time_now()
            function()
            runs += 1
            if not restart:
                break

        except KeyboardInterrupt:
            break

        except Exception as ex:
            ran = pscheduler.time_now() - started
            ran_seconds = pscheduler.timedelta_as_seconds(ran)

            log.error("Program threw an exception after %s", ran)
            log.exception()

            # Running longer than the backoff is a good excuse to try
            # starting over.
            if ran_seconds > current_backoff and runs != 0:
                currrent_backoff = initial_backoff
                log.error("Restarting immediately.")
            else:
                log.error("Waiting %s seconds before restarting",
                          current_backoff)
                time.sleep(current_backoff)
                if current_backoff < backoff_max:
                    current_backoff += initial_backoff
                log.error("Restarting")
Ejemplo n.º 3
0
def get_status():
    response = {}

    response["time"] = pscheduler.datetime_as_iso8601(pscheduler.time_now())

    # Get the heartbeat status
    try:
        services = dbcursor_query("SELECT * FROM heartbeat_json",
                                  onerow=True).fetchone()[0]
    except Exception:
        services = {}

    # Add the database status
    try:
        # query database, calculate server run time
        cursor = dbcursor_query(
            "SELECT extract(epoch from current_timestamp - pg_postmaster_start_time())",
            onerow=True)
        time_val = pscheduler.seconds_as_timedelta(cursor.fetchone()[0])
        response["services"]["database"] = {
            "uptime": str(pscheduler.timedelta_as_iso8601(time_val))
        }
    except Exception:
        pass

    response["services"] = services

    runs = {}
    # query database for last run information
    try:
        cursor = dbcursor_query(
            "SELECT times_actual FROM run WHERE state=run_state_finished()")
        times = cursor.fetchall()
        formatted = []
        for val in times:
            formatted.append(val[0].upper)
        runs["last-finished"] = str(
            pscheduler.datetime_as_iso8601(max(formatted)))
    except Exception:
        # handles empty result and faulty query
        runs["last-finished"] = None

    # query database for last scheduled information
    try:
        cursor = dbcursor_query("SELECT added FROM run")
        times = cursor.fetchall()
        formatted = []
        for val in times:
            formatted.append(val[0])
        runs["last-scheduled"] = str(
            pscheduler.datetime_as_iso8601(max(formatted)))
    except Exception:
        # handles empty result and faulty query
        runs["last-scheduled"] = None

    response["runs"] = runs

    return ok_json(response)
Ejemplo n.º 4
0
def safe_run(
    function,
    name=None,
    backoff=0.25,  # Backoff time increment
    backoff_max=60.0,  # Longest allowable backoff
    restart=True  # Call again if the function returns
):
    """
    Safely call a long-running lambda (usually a main program),
    catching and logging exceptions.  If an exception is thrown, the
    calling program will be re-exec'd using the same arguments
    immediately if it simply returns or after a linearly-increasing
    backoff if it raises an exception.  The backoff always applies if
    the function has not yet run successfully and time will reset once
    the lambda runs any longer than the last backoff delay.
    """

    if not isinstance(function, type(lambda: 0)):
        raise ValueError("Function provided is not a lambda.")

    log = pscheduler.Log(name=name,
                         prefix='safe_run',
                         signals=False,
                         quiet=True)

    # Inherit state from the environment

    if STATE_VARIABLE in os.environ:

        try:
            depickled = pickle.loads(os.environ[STATE_VARIABLE])

            initial_backoff = depickled['initial_backoff']
            assert type(initial_backoff) in [int, float]

            current_backoff = depickled['current_backoff']
            assert type(current_backoff) in [int, float]

            runs = depickled['runs']
            assert type(runs) == int

        except Exception as ex:
            log.error("Failed to decode %s '%s': %s" %
                      (STATE_VARIABLE, os.environ[STATE_VARIABLE], ex))
            exit(1)
    else:

        initial_backoff = backoff
        current_backoff = backoff
        runs = 0

    # Run the function

    do_restart = False

    try:
        started = pscheduler.time_now()
        function()
        runs += 1
        do_restart = restart

    except KeyboardInterrupt:
        pass

    except Exception as ex:
        ran = pscheduler.time_now() - started
        ran_seconds = pscheduler.timedelta_as_seconds(ran)

        log.error("Program threw an exception after %s", ran)
        log.exception()

        # Running longer than the backoff is a good excuse to try
        # starting over.
        if ran_seconds > current_backoff and runs != 0:
            currrent_backoff = initial_backoff
        else:
            log.error("Waiting %s seconds before restarting", current_backoff)
            time.sleep(current_backoff)
            if current_backoff < backoff_max:
                current_backoff += initial_backoff

        do_restart = True

    if not do_restart:
        log.error("Exiting")
        exit(0)

    log.error("Restarting: %s", sys.argv)

    #
    # Pickle the current state to pass along
    #

    to_pickle = {
        'initial_backoff': initial_backoff,
        'current_backoff': current_backoff,
        'runs': runs
    }
    os.environ[STATE_VARIABLE] = pickle.dumps(to_pickle)

    os.execvp(sys.argv[0], sys.argv)
Ejemplo n.º 5
0
def run_program(
    argv,  # Program name and args
    stdin=None,  # What to send to stdin
    line_call=None,  # Lambda to call when a line arrives
    timeout=None,  # Seconds
    timeout_ok=False,  # Treat timeouts as not being an error
    fail_message=None,  # Exit with this failure message
    env=None,  # Environment for new process, None=existing
    env_add=None,  # Add hash to existing environment
    attempts=10):  # Max attempts to start the process
    """
    Run a program and return the results.

    Arguments:

    argv - Array containing arguments, including name of program
    stdin=s - String containing what should be sent to standard input
    line_call=l - Call lambda l with one argument containing a line which
        arrived on stdout each time that happens.  If provided, the
        'stdout' return value will be None.
    timeout=n - Wait n seconds for the program to finish, otherwise kill it.
    timeout_ok - True to prevent timeouts from being treated as errors.
    fail_message=s - Exit program and include string s if program fails.
    env=h - Pass environment hash 'h' to the child process, using the
        existing environment if the value is None.
    env_add=h - Add contents of hash 'h' to environment.


    Return Values:

    status - Status code returned by the program
    stdout - Contents of standard output as a single string
    stderr - Contents of standard erroras a single string
    """

    process = None

    if [arg for arg in argv if arg is None]:
        raise Exception("Can't run with null arguments.")

    # Build up a new, incorruptable copy of the environment for the
    # child process to use.

    if env_add is None:
        env_add = {}

    if env is None and len(env_add) == 0:
        new_env = None
    else:
        new_env = (os.environ if env is None else env).copy()
        new_env.update(env_add)

    def __get_process(argv, new_env, attempts):
        """Try to start a process, handling EAGAINs."""
        while attempts > 0:
            attempts -= 1
            try:
                return _Popen(argv,
                              stdin=subprocess32.PIPE,
                              stdout=subprocess32.PIPE,
                              stderr=subprocess32.PIPE,
                              env=new_env)
            except OSError as ex:
                # Non-EAGAIN or last attempt gets re-raised.
                if ex.errno != errno.EAGAIN or attempts == 0:
                    raise ex
                # TODO: Should we sleep a bit here?

        assert False, "This code should not be reached."

    try:
        process = __get_process(argv, new_env, attempts)

        __running_add(process)

        if line_call is None:

            # Single-shot I/O with optional timeout

            try:
                stdout, stderr = process.communicate(stdin, timeout=timeout)
                status = process.returncode

            except subprocess32.TimeoutExpired:
                _end_process(process)
                status = 0 if timeout_ok else 2
                stdout = ''
                stderr = "Process took too long to run."

        else:

            # Read one line at a time, passing each to the line_call lambda

            if not isinstance(line_call, type(lambda: 0)):
                raise ValueError("Function provided is not a lambda.")

            if stdin is not None:
                process.stdin.write(stdin)
            process.stdin.close()

            stderr = ''

            stdout_fileno = process.stdout.fileno()
            stderr_fileno = process.stderr.fileno()

            fds = [stdout_fileno, stderr_fileno]

            if timeout is not None:
                end_time = pscheduler.time_now() \
                    + pscheduler.seconds_as_timedelta(timeout)
            else:
                time_left = None

            while True:

                if timeout is not None:
                    time_left = pscheduler.timedelta_as_seconds(
                        end_time - pscheduler.time_now())

                reads, _, _ = polled_select(fds, [], [], time_left)

                if len(reads) == 0:
                    __running_drop(process)
                    _end_process(process)
                    return 2, None, "Process took too long to run."

                for readfd in reads:
                    if readfd == stdout_fileno:
                        got_line = process.stdout.readline()
                        if got_line != '':
                            line_call(got_line[:-1])
                    elif readfd == stderr_fileno:
                        got_line = process.stderr.readline()
                        if got_line != '':
                            stderr += got_line

                if process.poll() != None:
                    break

            # Siphon off anything left on stdout
            while True:
                got_line = process.stdout.readline()
                if got_line == '':
                    break
                line_call(got_line[:-1])

            process.wait()

            status = process.returncode
            stdout = None

    except Exception as ex:
        extype, _, trace = sys.exc_info()
        status = 2
        stdout = ''
        stderr = ''.join(traceback.format_exception_only(extype, ex)) \
            + ''.join(traceback.format_exception(extype, ex, trace)).strip()

    if process is not None:
        __running_drop(process)
        _end_process(process)

    if fail_message is not None and status != 0:
        pscheduler.fail("%s: %s" % (fail_message, stderr))

    return status, stdout, stderr
Ejemplo n.º 6
0
def run_program(argv,              # Program name and args
                stdin=None,        # What to send to stdin
                line_call=None,    # Lambda to call when a line arrives
                timeout=None,      # Seconds
                timeout_ok=False,  # Treat timeouts as not being an error
                short=False,       # True to force timeout to 2 seconds
                fail_message=None  # Exit with this failure message
    ):
    """
    Run a program and return the results.

    Arguments:

    argv - Array containing arguments, including name of program
    stdin=s - String containing what should be sent to standard input
    line_call=l - Call lambda l with one argument containing a line which
        arrived on stdout each time that happens.  If provided, the
        'stdout' return value will be None.
    timeout=n - Wait n seconds for the program to finish, otherwise kill it.
    timeout_ok - True to prevent timeouts from being treated as errors.
    short - True to force timeout to two seconds
    fail_message=s - Exit program and include string s if program fails.

    Return Values:

    status - Status code returned by the program
    stdout - Contents of standard output as a single string
    stderr - Contents of standard erroras a single string
    """

    process = None

    if filter(lambda v: v is None, argv):
        raise Exception("Can't run with null arguments.")

    try:
        process = subprocess32.Popen(argv,
                                     stdin=subprocess32.PIPE,
                                     stdout=subprocess32.PIPE,
                                     stderr=subprocess32.PIPE,
                                     )

        __running_add(process)

        if line_call is None:

            # Single-shot I/O with optional timeout

            try:
                stdout, stderr = process.communicate(stdin, timeout=timeout)
                status = process.returncode

            except subprocess32.TimeoutExpired:
                # Clean up after a timeout
                try:
                    process.kill()
                except OSError:
                    pass  # Can't kill things that change UID
                process.communicate()

                status = 0 if timeout_ok else 2

                # TODO: See if the exception has the contents of stdout and
                # stderr available.
                stdout = ''
                stderr = "Process took too long to run."

        else:

            # Read one line at a time, passing each to the line_call lambda

            if not isinstance(line_call, type(lambda:0)):
                raise ValueError("Function provided is not a lambda.")

            if stdin is not None:
                process.stdin.write(stdin)
            process.stdin.close()

            stderr = ''

            stdout_fileno = process.stdout.fileno()
            stderr_fileno = process.stderr.fileno()

            fds = [ stdout_fileno, stderr_fileno ]

            end_time = pscheduler.time_now() \
                + pscheduler.seconds_as_timedelta(timeout)

            while True:

                time_left = pscheduler.timedelta_as_seconds(
                    end_time - pscheduler.time_now() )

                reads, writes, specials = select.select(fds, [], [], time_left)

                if len(reads) == 0:
                    __running_drop(process)
                    return 2, None, "Process took too long to run."

                for fd in reads:
                    if fd == stdout_fileno:
                        line = process.stdout.readline()
                        if line != '':
                            line_call(line[:-1])
                    elif fd == stderr_fileno:
                        line = process.stderr.readline()
                        if line != '':
                            stderr += line

                if process.poll() != None:
                    break

            process.wait()

            status = process.returncode
            stdout = None

    except Exception as ex:
        extype, ex_dummy, tb = sys.exc_info()
        status = 2
        stdout = ''
        stderr = ''.join(traceback.format_exception_only(extype, ex)) \
            + ''.join(traceback.format_exception(extype, ex, tb)).strip()


    if process is not None:
        __running_drop(process)

    if fail_message is not None and status != 0:
        pscheduler.fail("%s: %s" % (fail_message, stderr))

    return status, stdout, stderr