def init_session(config, app=None, return_context=False, overrides=None, use_session=False): import os from pyhocon import ConfigFactory, ConfigParser if isinstance(config, str): if os.path.exists(config): base_conf = ConfigFactory.parse_file(config, resolve=False) else: base_conf = ConfigFactory.parse_string(config, resolve=False) elif isinstance(config, dict): base_conf = ConfigFactory.from_dict(config) else: base_conf = config if overrides is not None: over_conf = ConfigFactory.parse_string(overrides) conf = over_conf.with_fallback(base_conf) else: conf = base_conf ConfigParser.resolve_substitutions(conf) res = init_spark(conf, app, use_session) if use_session: return res else: mode_yarn = conf['spark-prop.spark.master'].startswith('yarn') if mode_yarn: from pyspark.sql import HiveContext sqc = HiveContext(res) if 'hive-prop' in conf: for k, v in prop_list(conf['hive-prop']).items(): sqc.setConf(k, str(v)) else: from pyspark.sql import SQLContext sqc = SQLContext(res) if return_context: return res, sqc else: return sqc
def test_self_merge_ref_substitutions_object(self): config1 = ConfigFactory.parse_string(""" a : { } b : 1 c : ${a} { d : [ ${b} ] } """, resolve=False) config2 = ConfigFactory.parse_string(""" e : ${a} { } """, resolve=False) merged = ConfigTree.merge_configs(config1, config2) resolved = ConfigParser.resolve_substitutions(merged) assert resolved.get("c.d") == [1]
def test_self_merge_ref_substitutions_object3(self): config1 = ConfigFactory.parse_string(""" b1 : { v1: 1 } b = [${b1}] """, resolve=False) config2 = ConfigFactory.parse_string(""" b1 : { v1: 2, v2: 3 } """, resolve=False) merged = ConfigTree.merge_configs(config1, config2) resolved = ConfigParser.resolve_substitutions(merged) assert resolved.get("b1") == {"v1": 2, "v2": 3} b = resolved.get("b") assert len(b) == 1 assert b[0] == {"v1": 2, "v2": 3}
def test_self_merge_ref_substitutions_object2(self): config1 = ConfigFactory.parse_string(""" x : { v1: 1 } b1 : {v2: 2 } b = [${b1}] """, resolve=False) config2 = ConfigFactory.parse_string(""" b2 : ${x} {v2: 3} b += [${b2}] """, resolve=False) merged = ConfigTree.merge_configs(config1, config2) resolved = ConfigParser.resolve_substitutions(merged) b = resolved.get("b") assert len(b) == 2 assert b[0] == {'v2': 2} assert b[1] == {'v1': 1, 'v2': 3}
def test_self_merge_ref_substitutions_object(self): config1 = ConfigFactory.parse_string( """ a : { } b : 1 c : ${a} { d : [ ${b} ] } """, resolve=False ) config2 = ConfigFactory.parse_string( """ e : ${a} { } """, resolve=False ) merged = ConfigTree.merge_configs(config1, config2) resolved = ConfigParser.resolve_substitutions(merged) assert resolved.get("c.d") == [1]
def test_self_merge_ref_substitutions_object3(self): config1 = ConfigFactory.parse_string( """ b1 : { v1: 1 } b = [${b1}] """, resolve=False ) config2 = ConfigFactory.parse_string( """ b1 : { v1: 2, v2: 3 } """, resolve=False ) merged = ConfigTree.merge_configs(config1, config2) resolved = ConfigParser.resolve_substitutions(merged) assert resolved.get("b1") == {"v1": 2, "v2": 3} b = resolved.get("b") assert len(b) == 1 assert b[0] == {"v1": 2, "v2": 3}
def test_self_merge_ref_substitutions_object2(self): config1 = ConfigFactory.parse_string( """ x : { v1: 1 } b1 : {v2: 2 } b = [${b1}] """, resolve=False ) config2 = ConfigFactory.parse_string( """ b2 : ${x} {v2: 3} b += [${b2}] """, resolve=False ) merged = ConfigTree.merge_configs(config1, config2) resolved = ConfigParser.resolve_substitutions(merged) b = resolved.get("b") assert len(b) == 2 assert b[0] == {'v2': 2} assert b[1] == {'v1': 1, 'v2': 3}
def load_config(cwd=os.getcwd(), debug=False): """ Tries to find HOCON files named "iss4e.conf" using the paths returned by find_files(). The found files are then parsed and merged together, so that a single configuration dict is returned. For details on HOCON syntax, see https://github.com/chimpler/pyhocon and https://github.com/typesafehub/config/ Example configuration: - default config in home dir (~/iss4e.conf): datasources { influx { host = ${HOSTNAME} # also set your passwords (e.g. from env with ${MYSQL_PASSWD} here } mysql { host = localhost } } - local config in cwd (./iss4e.conf): webike { # use the generic information from ${datasources.influx} (should be defined in ~/iss4e.conf and contain # host, password, ...) and extend it to use the (non-generic) database "webike" influx = ${datasources.influx} { db = "webike" } } - merged config that will be returned: { "datasources": { "influx": { "host": "SD959-LT" }, "mysql": { "host": "localhost" } }, "webike": { "influx": { "host": "SD959-LT", # copied from ~/iss4e.conf: datasources.influx "db": "webike" } } } """ # find "iss4e.conf" file in current working dir or parent directories files = find_files("iss4e.conf", cwd) configs = [ ConfigFactory.parse_file(file, required=False, resolve=False) for file in files if os.path.isfile(file) ] if debug: print("Config files:\n" + "\n".join(file + " [" + ("not " if not os.path.isfile(file) else "") + "found]" for file in files)) # merge all levels of config config = ConfigTree(root=True) config.put( "__main__", os.path.basename( getattr(sys.modules['__main__'], "__file__", "__cli__"))) config.put("__cwd__", os.path.abspath(cwd)) for c in configs: config = ConfigTree.merge_configs(c, config) config = ConfigParser.resolve_substitutions(config) if debug: print("Loaded config:\n" + HOCONConverter.to_json(config)) # if config contains a key "logging", use it to reconfigure python logging if "logging" in config: if debug: print("Reconfiguring logging from config") if config.get("capture_exceptions", True): sys.excepthook = log_uncaught_exception logging.captureWarnings(config.get("capture_warnings", True)) logging.config.dictConfig(config["logging"].as_plain_ordered_dict()) # check python version # iss4e lib is using some syntax features and functions which were only introduced in python 3.5 rec_ver = tuple(config.get("min_py_version", [3, 5])) if sys.version_info < rec_ver: warnings.warn( "Using outdated python version {}, a version >= {} would be recommended for use with iss4e lib. " "Try using a newer python binary, e.g. by calling `python{}.{}` instead of the default `python`." .format(sys.version_info, rec_ver, rec_ver[0], rec_ver[1])) return config