Beispiel #1
0
def init_session(config,
                 app=None,
                 return_context=False,
                 overrides=None,
                 use_session=False):
    import os
    from pyhocon import ConfigFactory, ConfigParser

    if isinstance(config, str):
        if os.path.exists(config):
            base_conf = ConfigFactory.parse_file(config, resolve=False)
        else:
            base_conf = ConfigFactory.parse_string(config, resolve=False)
    elif isinstance(config, dict):
        base_conf = ConfigFactory.from_dict(config)
    else:
        base_conf = config

    if overrides is not None:
        over_conf = ConfigFactory.parse_string(overrides)
        conf = over_conf.with_fallback(base_conf)
    else:
        conf = base_conf
        ConfigParser.resolve_substitutions(conf)

    res = init_spark(conf, app, use_session)

    if use_session:
        return res
    else:
        mode_yarn = conf['spark-prop.spark.master'].startswith('yarn')

        if mode_yarn:
            from pyspark.sql import HiveContext
            sqc = HiveContext(res)

            if 'hive-prop' in conf:
                for k, v in prop_list(conf['hive-prop']).items():
                    sqc.setConf(k, str(v))
        else:
            from pyspark.sql import SQLContext
            sqc = SQLContext(res)

        if return_context:
            return res, sqc
        else:
            return sqc
Beispiel #2
0
 def test_self_merge_ref_substitutions_object(self):
     config1 = ConfigFactory.parse_string("""
         a : { }
         b : 1
         c : ${a} { d : [ ${b} ] }
         """,
                                          resolve=False)
     config2 = ConfigFactory.parse_string("""
         e : ${a} {
         }
         """,
                                          resolve=False)
     merged = ConfigTree.merge_configs(config1, config2)
     resolved = ConfigParser.resolve_substitutions(merged)
     assert resolved.get("c.d") == [1]
Beispiel #3
0
 def test_self_merge_ref_substitutions_object3(self):
     config1 = ConfigFactory.parse_string("""
         b1 : { v1: 1 }
         b = [${b1}]
         """,
                                          resolve=False)
     config2 = ConfigFactory.parse_string("""
         b1 : { v1: 2, v2: 3 }
         """,
                                          resolve=False)
     merged = ConfigTree.merge_configs(config1, config2)
     resolved = ConfigParser.resolve_substitutions(merged)
     assert resolved.get("b1") == {"v1": 2, "v2": 3}
     b = resolved.get("b")
     assert len(b) == 1
     assert b[0] == {"v1": 2, "v2": 3}
Beispiel #4
0
 def test_self_merge_ref_substitutions_object2(self):
     config1 = ConfigFactory.parse_string("""
         x : { v1: 1 }
         b1 : {v2: 2 }
         b = [${b1}]
         """,
                                          resolve=False)
     config2 = ConfigFactory.parse_string("""
         b2 : ${x} {v2: 3}
         b += [${b2}]
         """,
                                          resolve=False)
     merged = ConfigTree.merge_configs(config1, config2)
     resolved = ConfigParser.resolve_substitutions(merged)
     b = resolved.get("b")
     assert len(b) == 2
     assert b[0] == {'v2': 2}
     assert b[1] == {'v1': 1, 'v2': 3}
Beispiel #5
0
 def test_self_merge_ref_substitutions_object(self):
     config1 = ConfigFactory.parse_string(
         """
         a : { }
         b : 1
         c : ${a} { d : [ ${b} ] }
         """,
         resolve=False
     )
     config2 = ConfigFactory.parse_string(
         """
         e : ${a} {
         }
         """,
         resolve=False
     )
     merged = ConfigTree.merge_configs(config1, config2)
     resolved = ConfigParser.resolve_substitutions(merged)
     assert resolved.get("c.d") == [1]
Beispiel #6
0
 def test_self_merge_ref_substitutions_object3(self):
     config1 = ConfigFactory.parse_string(
         """
         b1 : { v1: 1 }
         b = [${b1}]
         """,
         resolve=False
     )
     config2 = ConfigFactory.parse_string(
         """
         b1 : { v1: 2, v2: 3 }
         """,
         resolve=False
     )
     merged = ConfigTree.merge_configs(config1, config2)
     resolved = ConfigParser.resolve_substitutions(merged)
     assert resolved.get("b1") == {"v1": 2, "v2": 3}
     b = resolved.get("b")
     assert len(b) == 1
     assert b[0] == {"v1": 2, "v2": 3}
Beispiel #7
0
 def test_self_merge_ref_substitutions_object2(self):
     config1 = ConfigFactory.parse_string(
         """
         x : { v1: 1 }
         b1 : {v2: 2 }
         b = [${b1}]
         """,
         resolve=False
     )
     config2 = ConfigFactory.parse_string(
         """
         b2 : ${x} {v2: 3}
         b += [${b2}]
         """,
         resolve=False
     )
     merged = ConfigTree.merge_configs(config1, config2)
     resolved = ConfigParser.resolve_substitutions(merged)
     b = resolved.get("b")
     assert len(b) == 2
     assert b[0] == {'v2': 2}
     assert b[1] == {'v1': 1, 'v2': 3}
Beispiel #8
0
def load_config(cwd=os.getcwd(), debug=False):
    """
    Tries to find HOCON files named "iss4e.conf" using the paths returned by find_files().
    The found files are then parsed and merged together, so that a single configuration dict is returned.
    For details on HOCON syntax, see https://github.com/chimpler/pyhocon and https://github.com/typesafehub/config/

    Example configuration:
    - default config in home dir (~/iss4e.conf):
        datasources {
            influx {
                host = ${HOSTNAME}
                # also set your passwords (e.g. from env with ${MYSQL_PASSWD} here
            }
            mysql {
                host = localhost
            }
        }

    - local config in cwd (./iss4e.conf):
        webike {
            # use the generic information from ${datasources.influx} (should be defined in ~/iss4e.conf and contain
            # host, password, ...) and extend it to use the (non-generic) database "webike"
            influx = ${datasources.influx} {
                db = "webike"
            }
        }

    - merged config that will be returned:
        {
            "datasources": {
                "influx": {
                    "host": "SD959-LT"
                },
                "mysql": {
                    "host": "localhost"
                }
            },
            "webike": {
                "influx": {
                    "host": "SD959-LT", # copied from ~/iss4e.conf: datasources.influx
                    "db": "webike"
                }
            }
        }
    """
    # find "iss4e.conf" file in current working dir or parent directories
    files = find_files("iss4e.conf", cwd)
    configs = [
        ConfigFactory.parse_file(file, required=False, resolve=False)
        for file in files if os.path.isfile(file)
    ]
    if debug:
        print("Config files:\n" +
              "\n".join(file + " [" +
                        ("not " if not os.path.isfile(file) else "") + "found]"
                        for file in files))
    # merge all levels of config
    config = ConfigTree(root=True)
    config.put(
        "__main__",
        os.path.basename(
            getattr(sys.modules['__main__'], "__file__", "__cli__")))
    config.put("__cwd__", os.path.abspath(cwd))
    for c in configs:
        config = ConfigTree.merge_configs(c, config)
    config = ConfigParser.resolve_substitutions(config)
    if debug:
        print("Loaded config:\n" + HOCONConverter.to_json(config))

    # if config contains a key "logging", use it to reconfigure python logging
    if "logging" in config:
        if debug:
            print("Reconfiguring logging from config")
        if config.get("capture_exceptions", True):
            sys.excepthook = log_uncaught_exception
        logging.captureWarnings(config.get("capture_warnings", True))
        logging.config.dictConfig(config["logging"].as_plain_ordered_dict())

    # check python version
    # iss4e lib is using some syntax features and functions which were only introduced in python 3.5
    rec_ver = tuple(config.get("min_py_version", [3, 5]))
    if sys.version_info < rec_ver:
        warnings.warn(
            "Using outdated python version {}, a version >= {} would be recommended for use with iss4e lib. "
            "Try using a newer python binary, e.g. by calling `python{}.{}` instead of the default `python`."
            .format(sys.version_info, rec_ver, rec_ver[0], rec_ver[1]))

    return config