def test_logging_to_syslog(): if platform.system() == 'Windows' or platform.system() == 'Darwin': return if not os.path.exists(LOCAL1_LOG): warnings.warn( f"{LOCAL1_LOG} does not exist; cannot test logging to local1") return clogging.setup(level=logging.INFO, syslog=True) nonce = str(time.time()) logging.error("Logging at t={}.".format(nonce)) # Wait a few milliseconds for the nonce to appear in the logfile time.sleep(.01) # Look for the nonce count = 0 for line in open(LOCAL1_LOG): if nonce in line: sys.stdout.write(line) count += 1 if count == 0: warnings.warn("local1 is not logging to /var/log/local1.log") assert count in [0, 1, 2] clogging.shutdown()
def square(x): """This is the map function. It's going to run on the executors. Log the hostname, the PID and X as a JSON object""" from pyspark import SparkContext clogging.setup(level=logging.INFO, syslog='True') logging.info( json.dumps({ 'hostname': socket.gethostname(), 'pid': os.getpid(), 'x': x, 'func': 'square', 'applicationId': applicationId() })) return x * x
sc = cspark.spark_context(num_executors=int(num_executors), pydirs=[ os.path.dirname(__file__), os.path.join(os.path.dirname(__file__), 'ctools'), os.path.join(os.path.dirname(__file__), 'dfxml/python') ]) ### ### ENABLE LOGGING. ### Logging must be set up before any logging is done ### Note that we log to both a file and to syslog facility 1 ### clogging.setup(args.loglevel) logging.info("START {} ".format(os.path.abspath(__file__))) (mig, db) = process_setup_section(config, sc) process_recodes_section(config, mig, db) # Dump or print schema, as necessary if args.dump: db.dump() if args.schema: print(db.sql_schema()) #if args.spark: mig.add_spark_to_environment() # run steps are implemented as compiled code. mig.add_tables_to_environment(db) # Add globals to environment if we have any if GLOBALS in config[RUN_SECTION]:
print("Running spark with 16 executors.... My PID is {}".format( os.getpid())) sc = cspark.spark_session( num_executors=16, pyfiles=[ os.path.join(os.path.dirname(os.path.abspath(__file__)), 'clogging.py') ]).sparkContext print("Spark Context Obtained. sc={} My PID is now {}".format( sc, os.getpid())) print("application id:", sc.applicationId) # Initialize logging on the head-end. # This is done after the Spark context is acquired, but it could be done before. clogging.setup(level=logging.INFO, syslog=True, filename='demo_logfile.log') # Count the squares of the numbers 1..1000 result = sc.parallelize(range(1, 1001)).map(square).reduce(myadder) print("The numbers 1..1000 square add to {}".format(result)) print( "Dumping the lines in the logfile that have my applicationId and collect all of the json objects:" ) objs = [] for line in open("/var/log/local1.log"): if sc.applicationId in line: print(line, end='') objs.append(json.loads(line[line.find('{'):]))
def main_setup(additional_args = []): """ Setup the DAS system logging, parses arguments and loads the configuration file, returning the args and config objects. """ parser = ArgumentParser(formatter_class=ArgumentDefaultsHelpFormatter) parser.add_argument("config", help="Main Config File") parser.add_argument("--print_bom", help="Output a bill of materials", action='store_true') parser.add_argument("--make_release", help="Create a zip file with all of the files necessary to run the DAS. Similar to print_bom") parser.add_argument("--experiment", help="Run an experiment according to the [experiment] section, with the results in this directory") parser.add_argument("--isolation", help="Specifies isolation mode for experiments", choices=['sameprocess', 'subprocess'], default='sameprocess') parser.add_argument("--graphdata", help="Just draw the graph from the data that was already collected.", action='store_true') parser.add_argument("--logfilename", help="Specify logfilename, otherwise auto-generate") parser.add_argument("--nohierconfig", help='Use regular Python configparser. ConfigParser instead of ctools.HierarchicalConfigParser', action="store_true") parser.add_argument("--dump_config", help="dump the config file, then exit", action='store_true') parser.add_argument("--get", help="output the section:option:default from the config file, then exit") parser.add_argument("--dry-run", help="Dry run; do not run the algorithm", action='store_true') for (args,kwargs) in additional_args: parser.add_argument(*args, **kwargs) clogging.add_argument(parser) args = parser.parse_args() if not os.path.exists(args.config): raise RuntimeError("{} does not exist".format(args.config)) if args.graphdata and args.experiment is None: parser.error("--graphdata requires --experiment") ### ### Read the configuration file and handle config-related options ### config = ConfigParser() if args.nohierconfig else HierarchicalConfigParser() config.read(args.config) if args.dump_config: config.write(sys.stdout) exit(0) if args.get: if args.get.count(":")!=2: raise ValueError("Specify section:option:default as the --get argument") (section, option, default) = args.get.split(":") if (section in config) and (option in config[section]): print(config[section][option]) else: print(default) exit(0) ### ### Logging must be set up before any logging is done ### By default it is in the current directory, but if we run an experiment, put the logfile in that directory ### Added option to put logs in a subfolder specified in the config if not args.logfilename: isodate = datetime.datetime.now().isoformat()[0:19] if (config.has_section(LOGGING_SECTION) and config.has_option(LOGGING_SECTION, LOGFOLDER_OPTION) and config.has_option(LOGGING_SECTION, LOGFILENAME_OPTION)): args.logfilename = (f"{config[LOGGING_SECTION][LOGFOLDER_OPTION]}/" f"{config[LOGGING_SECTION][LOGFILENAME_OPTION]}-{isodate}-{os.getpid()}.log") else: args.logfilename = f"{isodate}-{os.getpid()}.log" # CB: Code needs to be removed. # Left here for backward compatibility, to be removed in future versions if args.experiment: if not os.path.exists(args.experiment): os.makedirs(args.experiment) if not os.path.isdir(args.experiment): raise RuntimeError("{} is not a directory".format(args.experiment)) config[config.default_section][ROOT] = args.experiment args.logfilename = os.path.join(args.experiment, args.logfilename) if EXPERIMENT not in config: config.add_section(EXPERIMENT) config[EXPERIMENT][RUN_EXPERIMENT_FLAG] = "1" # If we are making the BOM, make a DAS object so the config file gets processed, then make the bom and exit if args.print_bom: print_bom(config=config, args=args) exit(0) if args.make_release: make_release(config=config, zipfilename=args.make_release, args=args) print("Release: {}".format(args.make_release)) exit(0) # # # Make sure the directory for the logfile exists. If not, make it. logdirname = os.path.dirname(args.logfilename) if logdirname and not os.path.exists(logdirname): os.mkdir(logdirname) clogging.setup(args.loglevel, syslog=False, filename=args.logfilename) logging.info("Config path: {}".format(os.path.abspath(args.config))) return args, config