def safe_run( function, name=None, backoff=0.25, # Backoff time increment backoff_max=60, # Longest allowable backoff restart=True # Call again if the function returns ): """ Safely call a long-running lambda (usually a main program), catching and logging exceptions. The lambda will be re-called immediately if it simply returns or after a linearly-increasing backoff if it raises an exception. The backoff always applies if the function has not yet run successfully and time will reset once the lambda runs any longer than the last backoff delay. """ if not isinstance(function, type(lambda: 0)): raise ValueError("Function provided is not a lambda.") log = pscheduler.Log(name=name, prefix='safe_run', signals=False, quiet=True) initial_backoff = backoff current_backoff = backoff runs = 0 while True: try: started = pscheduler.time_now() function() runs += 1 if not restart: break except KeyboardInterrupt: break except Exception as ex: ran = pscheduler.time_now() - started ran_seconds = pscheduler.timedelta_as_seconds(ran) log.error("Program threw an exception after %s", ran) log.exception() # Running longer than the backoff is a good excuse to try # starting over. if ran_seconds > current_backoff and runs != 0: currrent_backoff = initial_backoff log.error("Restarting immediately.") else: log.error("Waiting %s seconds before restarting", current_backoff) time.sleep(current_backoff) if current_backoff < backoff_max: current_backoff += initial_backoff log.error("Restarting")
import re import pscheduler import pprint logger = pscheduler.Log(quiet=True) # A whole bunch of pattern matching against the output of the "iperf" tool # client output. Builds up an object of interesting bits from it. def parse_output(lines): results = {} results['succeeded'] = True seen_header = False streams = {} dest_ip = None dest_port = None src_ip = None src_port = None for line in lines: # ignore bogus sessions if re.match('\(nan%\)', line): results["succeeded"] = False results["error"] = "Found NaN result" break if re.match('read failed: Connection refused', line): results["succeeded"] = False
def safe_run( function, name=None, backoff=0.25, # Backoff time increment backoff_max=60.0, # Longest allowable backoff restart=True # Call again if the function returns ): """ Safely call a long-running lambda (usually a main program), catching and logging exceptions. If an exception is thrown, the calling program will be re-exec'd using the same arguments immediately if it simply returns or after a linearly-increasing backoff if it raises an exception. The backoff always applies if the function has not yet run successfully and time will reset once the lambda runs any longer than the last backoff delay. """ if not isinstance(function, type(lambda: 0)): raise ValueError("Function provided is not a lambda.") log = pscheduler.Log(name=name, prefix='safe_run', signals=False, quiet=True) # Inherit state from the environment if STATE_VARIABLE in os.environ: try: depickled = pickle.loads(os.environ[STATE_VARIABLE]) initial_backoff = depickled['initial_backoff'] assert type(initial_backoff) in [int, float] current_backoff = depickled['current_backoff'] assert type(current_backoff) in [int, float] runs = depickled['runs'] assert type(runs) == int except Exception as ex: log.error("Failed to decode %s '%s': %s" % (STATE_VARIABLE, os.environ[STATE_VARIABLE], ex)) exit(1) else: initial_backoff = backoff current_backoff = backoff runs = 0 # Run the function do_restart = False try: started = pscheduler.time_now() function() runs += 1 do_restart = restart except KeyboardInterrupt: pass except Exception as ex: ran = pscheduler.time_now() - started ran_seconds = pscheduler.timedelta_as_seconds(ran) log.error("Program threw an exception after %s", ran) log.exception() # Running longer than the backoff is a good excuse to try # starting over. if ran_seconds > current_backoff and runs != 0: currrent_backoff = initial_backoff else: log.error("Waiting %s seconds before restarting", current_backoff) time.sleep(current_backoff) if current_backoff < backoff_max: current_backoff += initial_backoff do_restart = True if not do_restart: log.error("Exiting") exit(0) log.error("Restarting: %s", sys.argv) # # Pickle the current state to pass along # to_pickle = { 'initial_backoff': initial_backoff, 'current_backoff': current_backoff, 'runs': runs } os.environ[STATE_VARIABLE] = pickle.dumps(to_pickle) os.execvp(sys.argv[0], sys.argv)
# # Logger Handle # import pscheduler import sys from pschedulerapiserver import application from flask import Response from flask import request from .args import * # This is thread-safe, so no need to do anything special with it. log = pscheduler.Log(name='pscheduler-api', signals=False) # Don't use anything out of .response in this because it uses the # logger. @application.route("/debug", methods=['PUT']) def debug(): if request.method == 'PUT': try: new_state = arg_boolean('state') except ValueError: return Response("Invalid state", status=500)
import datetime import os import re import shutil import sys import time import pytz from subprocess import call #output contants DELAY_BUCKET_DIGITS = 2 #number of digits to round delay buckets DELAY_BUCKET_FORMAT = '%.2f' #Set buckets to nearest 2 decimal places CLOCK_ERROR_DIGITS = 2 #number of digits to round clock error #logger log = pscheduler.Log(prefix="tool-powstream", quiet=True) ## # Open config file def get_config(): config = None try: config = ConfigParser.ConfigParser() config.read(CONFIG_FILE) except: log.warning( "Unable to read configuration file %s. Proceeding with defaults.") return config
### # Utilities for talking to esmond import pscheduler import urllib log = pscheduler.Log(prefix="archiver-esmond", quiet=True) #Number of seconds to wait if no bytes received on wire HTTP_TIMEOUT = 5 DEFAULT_SUMMARIES = { "throughput": [ { "summary-window": 86400, "event-type": "throughput", "summary-type": "average", }, ], "packet-loss-rate": [ { "summary-window": 300, "event-type": "packet-loss-rate", "summary-type": "aggregation", }, { "summary-window": 3600, "event-type": "packet-loss-rate", "summary-type": "aggregation", }, {
import re import pscheduler import pprint import json logger = pscheduler.Log(prefix='tool-bwctliperf3', quiet=True) # A whole bunch of pattern matching against the output of the "iperf3" tool # client output. Builds up an object of interesting bits from it. def parse_output(lines): results = {} results['succeeded'] = True try: content = json.loads("".join(lines)) except Exception as e: results['succeeded'] = False results['error'] = "Unable to parse iperf3 output as JSON: %s" % e return results intervals = [] if content.has_key('intervals'): intervals = content['intervals'] else: results['succeeded'] = False results['error'] = "iperf3 output is missing required field 'intervals'" return results final_streams = [] # Go through the JSON and convert to what we're expecting in throughput tests
### # utilities used by owping command # from owping_defaults import * import ConfigParser import pscheduler #Role constants CLIENT_ROLE = 0 SERVER_ROLE = 1 log = pscheduler.Log(prefix="tool-owping", quiet=True) ## # Determine whether particpant will act as client or server def get_role(participant, test_spec): # #Uncomment this when we do multi-participant # role = None # flip = test_spec.get('flip', False) # single_participant_mode = test_spec.get('single-participant-mode', False) # if participant == 0: # if single_participant_mode: # role = CLIENT_ROLE # elif flip: # role = SERVER_ROLE # else: # role = CLIENT_ROLE # elif participant == 1: # if flip: