Ejemplo n.º 1
0
def test_env_isolation(set_env):
    set_env({"FOO": "foo"})
    env1 = environs.Env()

    @env1.parser_for("foo")
    def foo(value):
        return value

    env2 = environs.Env()

    # env1 has a parser for foo, but env2 does not
    assert env1.foo("FOO") == "foo"
    with pytest.raises(AttributeError):
        env2.foo("FOO")
Ejemplo n.º 2
0
def test_env_isolation(set_env):
    set_env({'FOO': 'foo'})
    env1 = environs.Env()

    @env1.parser_for('foo')
    def foo(value):
        return value

    env2 = environs.Env()

    # env1 has a parser for foo, but env2 does not
    assert env1.foo('FOO') == 'foo'
    with pytest.raises(AttributeError):
        env2.foo('FOO')
Ejemplo n.º 3
0
def main() -> None:
    env = environs.Env()
    calibrated_path: Path = env.path('CALIBRATED_PATH')
    location_path: Path = env.path('LOCATION_PATH')
    out_path: Path = env.path('OUT_PATH')
    log_level: str = env.log_level('LOG_LEVEL', 'INFO')
    source_type_index: int = env.int('SOURCE_TYPE_INDEX')
    year_index: int = env.int('YEAR_INDEX')
    month_index: int = env.int('MONTH_INDEX')
    day_index: int = env.int('DAY_INDEX')
    source_id_index: int = env.int('SOURCE_ID_INDEX')
    data_type_index: int = env.int('DATA_TYPE_INDEX')
    log_config.configure(log_level)
    log.debug(
        f'calibrated_path: {calibrated_path} location_path: {location_path} out_path: {out_path}'
    )
    config = Config(calibrated_path=calibrated_path,
                    location_path=location_path,
                    out_path=out_path,
                    source_type_index=source_type_index,
                    year_index=year_index,
                    month_index=month_index,
                    day_index=day_index,
                    source_id_index=source_id_index,
                    data_type_index=data_type_index)
    grouper = CalibratedLocationFileGrouper(config)
    grouper.group_files()
Ejemplo n.º 4
0
def pg_connection(request):
    import environs
    env = environs.Env()
    db_url = env('DATABASE_URL')
    apply_migrations(db_url)
    conn = psycopg2.connect(dsn=db_url)
    return conn
Ejemplo n.º 5
0
    def __init__(self, name, root_dir=os.path.dirname(__file__)):
        self.name = name
        self.console = None

        # load enviroment variables from .env file
        app_env = environs.Env()
        app_env.read_env()

        with app_env.prefixed("{}_".format(name)):
            self.ROOT_PATH = app_env("ROOT_PATH", False) or root_dir
            self.ENV = app_env("ENV", False)
            self.CONFIG_PATH = app_env("CONFIG_PATH", False) or os.path.join(
                self.ROOT_PATH, "config/")
            self.OVERRIDE_CONFIG_PATH = app_env("OVERRIDE_CONFIG_PATH", False)

            self.CACHE_PATH = app_env("CACHE_PATH", False) or os.path.join(
                self.ROOT_PATH, "cache/")
            self.INPUT_DIR = app_env("INPUT_DIR", False) or os.path.join(
                self.ROOT_PATH)
            self.OUTPUT_PATH = app_env("OUTPUT_DIR", False) or os.path.join(
                self.ROOT_PATH)
        # Load application config
        config = AppEnv._load_config(self.CONFIG_PATH)

        # Used in testing, config files in test data will override local config files
        if self.OVERRIDE_CONFIG_PATH is not None and os.path.isdir(
                self.OVERRIDE_CONFIG_PATH):
            config = AppEnv._load_config(self.OVERRIDE_CONFIG_PATH, config)
        # return configuration (loaded or overide)
        self.CONFIG = config
def main() -> None:
    env = environs.Env()
    data_path: Path = env.path('DATA_PATH')
    out_path: Path = env.path('OUT_PATH')
    log_level: str = env.log_level('LOG_LEVEL')
    log_config.configure(log_level)
    log = get_logger()
    log.debug(f'data_dir: {data_path} out_dir: {out_path}')
    parser = argparse.ArgumentParser()
    parser.add_argument('--yearindex')
    parser.add_argument('--monthindex')
    parser.add_argument('--dayindex')
    parser.add_argument('--locindex')
    parser.add_argument('--subdirindex')
    args = parser.parse_args()
    year_index = int(args.yearindex)
    month_index = int(args.monthindex)
    day_index = int(args.dayindex)
    location_index = int(args.locindex)
    data_type_index = int(args.subdirindex)
    config = Config(data_path=data_path,
                    out_path=out_path,
                    year_index=year_index,
                    month_index=month_index,
                    day_index=day_index,
                    location_index=location_index,
                    data_type_index=data_type_index,
                    relative_path_index=0,
                    window_size=0)
    variable_pad = VariablePad(config)
    variable_pad.pad()
Ejemplo n.º 7
0
def main():
    env = environs.Env()
    data_path = env('DATA_PATH')
    location_path = env('LOCATION_PATH')
    empty_files_path = env('EMPTY_FILES_PATH')
    output_directories = env('OUTPUT_DIRECTORIES')
    out_path = env('OUT_PATH')
    log_level = env('LOG_LEVEL')
    log_config.configure(log_level)

    # directory names to output should be a comma separated string.
    if ',' in output_directories:
        output_directories = output_directories.split(',')

    # empty file paths
    empty_files_paths = get_empty_file_paths(empty_files_path)
    empty_data_path = empty_files_paths.get('empty_data_path')
    empty_flags_path = empty_files_paths.get('empty_flags_path')
    empty_uncertainty_data_path = empty_files_paths.get('empty_uncertainty_data_path')

    date_constraints = get_date_constraints()
    if date_constraints is not None:
        start_date = date_constraints.get('start_date')
        end_date = date_constraints.get('end_date')
        keys = gap_filler.get_data_files(data_path, out_path, start_date=start_date, end_date=end_date)
        gap_filler.process_location_files(location_path, keys, out_path, output_directories,
                                          empty_data_path, empty_flags_path, empty_uncertainty_data_path,
                                          start_date=start_date, end_date=end_date)
    else:
        keys = gap_filler.get_data_files(data_path, out_path)
        gap_filler.process_location_files(location_path, keys, out_path, output_directories,
                                          empty_data_path, empty_flags_path, empty_uncertainty_data_path)
def main() -> None:
    env = environs.Env()
    data_path: Path = env.path('DATA_PATH', None)
    calibration_path: Path = env.path('CALIBRATION_PATH', None)
    out_path: Path = env.path('OUT_PATH')
    log_level: str = env.log_level('LOG_LEVEL', 'INFO')
    data_source_id_index: int = env.int('DATA_SOURCE_ID_INDEX')
    data_source_type_index: int = env.int('DATA_SOURCE_TYPE_INDEX')
    data_year_index: int = env.int('DATA_YEAR_INDEX')
    data_month_index: int = env.int('DATA_MONTH_INDEX')
    data_day_index: int = env.int('DATA_DAY_INDEX')
    relative_path_index: int = env.int('RELATIVE_PATH_INDEX')

    log_config.configure(log_level)
    log = structlog.get_logger()
    log.debug(f'data_path: {data_path} calibration_path: {calibration_path} out_path: {out_path}')

    config = Config(data_path=data_path,
                    calibration_path=calibration_path,
                    out_path=out_path,
                    data_source_type_index=data_source_type_index,
                    data_source_id_index=data_source_id_index,
                    data_year_index=data_year_index,
                    data_month_index=data_month_index,
                    data_day_index=data_day_index,
                    relative_path_index=relative_path_index)
    process_files(config)
def main() -> None:
    env = environs.Env()
    data_path: Path = env.path('DATA_PATH')
    schema_path: Path = env.path('SCHEMA_PATH')
    out_path: Path = env.path('OUT_PATH')
    parse_calibration = env.bool('PARSE_CALIBRATION')
    log_level: str = env.log_level('LOG_LEVEL', 'INFO')
    source_type_index: int = env.int('SOURCE_TYPE_INDEX')
    year_index: int = env.int('YEAR_INDEX')
    month_index: int = env.int('MONTH_INDEX')
    day_index: int = env.int('DAY_INDEX')
    source_id_index: int = env.int('SOURCE_ID_INDEX')
    data_type_index: int = env.int('DATA_TYPE_INDEX')
    test_mode: bool = env.bool("TEST_MODE")
    log.debug(
        f'data_path: {data_path} schema_path: {schema_path} out_path: {out_path}'
    )
    log_config.configure(log_level)
    config = Config(data_path=data_path,
                    schema_path=schema_path,
                    out_path=out_path,
                    parse_calibration=parse_calibration,
                    source_type_index=source_type_index,
                    year_index=year_index,
                    month_index=month_index,
                    day_index=day_index,
                    source_id_index=source_id_index,
                    data_type_index=data_type_index,
                    test_mode=test_mode)
    array_parser.parse(config)
def main() -> None:
    """Add the location group name from the location file into the path."""
    env = environs.Env()
    source_path: Path = env.path('SOURCE_PATH')
    group: str = env.str('GROUP')
    out_path: Path = env.path('OUT_PATH')
    log_level: str = env.log_level('LOG_LEVEL', 'INFO')
    source_type_index: int = env.int('SOURCE_TYPE_INDEX')
    year_index: int = env.int('YEAR_INDEX')
    month_index: int = env.int('MONTH_INDEX')
    day_index: int = env.int('DAY_INDEX')
    location_index: int = env.int('LOCATION_INDEX')
    data_type_index: int = env.int('DATA_TYPE_INDEX')
    log_config.configure(log_level)
    log.debug(
        f'source_path: {source_path} group: {group} out_path: {out_path}')
    config = Config(source_path=source_path,
                    out_path=out_path,
                    group=group,
                    source_type_index=source_type_index,
                    year_index=year_index,
                    month_index=month_index,
                    day_index=day_index,
                    location_index=location_index,
                    data_type_index=data_type_index)
    location_group_path = LocationGroupPath(config)
    location_group_path.add_groups_to_paths()
Ejemplo n.º 11
0
    def __init__(self, name, root_dir=os.path.dirname(__file__)):
        self.name = name

        # load enviroment variables from .env file
        app_env = environs.Env()
        app_env.read_env()

        with app_env.prefixed("{}_".format(name)):
            self.ROOT_PATH = app_env("ROOT_PATH", False) or root_dir
            self.ENV = app_env("ENV", False)
            self.CONFIG_PATH = app_env("CONFIG_PATH", False) or os.path.join(
                self.ROOT_PATH, "config")
            self.OVERRIDE_CONFIG_PATH = app_env("OVERRIDE_CONFIG_PATH", False)
            self.CACHE_PATH = app_env("CACHE_PATH", False) or os.path.join(
                self.ROOT_PATH, "cache")
            self.IMPORT_PATH = app_env("IMPORT_PATH", False) or os.path.join(
                self.ROOT_PATH)
            self.OUTPUT_PATH = app_env("OUTPUT_PATH", False) or os.path.join(
                self.ROOT_PATH, "export")
            # TODO(ccollett): Refactor this to output path
            self.EXPORT_PATH = app_env("EXPORT_PATH", False) or os.path.join(
                self.ROOT_PATH, "export")
        # Load application config
        config = AppEnv._load_config(self.CONFIG_PATH)

        # Used in testing, config files in test data will override local config files
        if self.OVERRIDE_CONFIG_PATH is not None and os.path.isdir(
                self.OVERRIDE_CONFIG_PATH):
            config = AppEnv._load_config(self.OVERRIDE_CONFIG_PATH, config)
        self.CONFIG = config
def main() -> None:
    env = environs.Env()
    data_path: Path = env.path('DATA_PATH')
    location_path: Path = env.path('LOCATION_PATH')
    out_path: Path = env.path('OUT_PATH')
    log_level: str = env.log_level('LOG_LEVEL')
    source_type_index: int = env.int('SOURCE_TYPE_INDEX')
    year_index: int = env.int('YEAR_INDEX')
    month_index: int = env.int('MONTH_INDEX')
    day_index: int = env.int('DAY_INDEX')
    source_id_index: int = env.int('SOURCE_ID_INDEX')
    log_config.configure(log_level)
    log.debug(
        f'data_dir: {data_path} location_dir: {location_path} out_dir: {out_path}'
    )

    config = Config(data_path=data_path,
                    location_path=location_path,
                    out_path=out_path,
                    source_type_index=source_type_index,
                    year_index=year_index,
                    month_index=month_index,
                    day_index=day_index,
                    source_id_index=source_id_index)
    event_location_grouper = EventLocationGrouper(config)
    event_location_grouper.group_files()
Ejemplo n.º 13
0
def main() -> None:
    """Group data by related location groups."""
    env = environs.Env()
    data_path: Path = env.path('DATA_PATH')
    out_path: Path = env.path('OUT_PATH')
    log_level: str = env.log_level('LOG_LEVEL')
    source_type_index: int = env.int('SOURCE_TYPE_INDEX')
    year_index: int = env.int('YEAR_INDEX')
    month_index: int = env.int('MONTH_INDEX')
    day_index: int = env.int('DAY_INDEX')
    group_index: int = env.int('GROUP_INDEX')
    location_index: int = env.int('LOCATION_INDEX')
    data_type_index: int = env.int('DATA_TYPE_INDEX')
    log_config.configure(log_level)
    log.debug(f'data_path: {data_path} out_path: {out_path}')
    config = Config(data_path=data_path,
                    out_path=out_path,
                    source_type_index=source_type_index,
                    year_index=year_index,
                    month_index=month_index,
                    day_index=day_index,
                    group_index=group_index,
                    location_index=location_index,
                    data_type_index=data_type_index)
    related_location_grouper = RelatedLocationGrouper(config)
    related_location_grouper.group_files()
def main():
    env = environs.Env()
    source_path = env('SOURCE_PATH')
    out_path = env('OUT_PATH')
    log_level = env('LOG_LEVEL')
    log_config.configure(log_level)
    log.debug(f'source_path: {source_path} out_path: {out_path}')
    process(source_path, out_path)
def main():
    env = environs.Env()
    pathname = env('PATHNAME')
    out_path = env('OUT_PATH')
    log_level = env('LOG_LEVEL')
    log_config.configure(log_level)
    log.debug(f'pathname: {pathname}, log_level: {log_level}')
    join(pathname, out_path)
Ejemplo n.º 16
0
def main() -> None:
    env = environs.Env()
    log_level: str = env.log_level('LOG_LEVEL', 'INFO')
    prefix_index: int = env.int('PREFIX_INDEX')
    prefix_length: int = env.int('PREFIX_LENGTH')
    sort_index: int = env.int('SORT_INDEX')
    log_config.configure(log_level)
    package(prefix_index=prefix_index, prefix_length=prefix_length, sort_index=sort_index)
def main():
    env = environs.Env()
    out_path = env('OUT_PATH')
    db_url = env('DATABASE_URL')
    log_level = env('LOG_LEVEL')
    log_config.configure(log_level)
    log.debug(f'Out path: {out_path}')
    load(db_url, out_path)
def main():
    """Group related paths without modifying the paths."""
    env = environs.Env()
    related_paths = env('RELATED_PATHS')
    out_path = env('OUT_PATH')
    log_level = env('LOG_LEVEL')
    log_config.configure(log_level)
    log.debug(f'related_paths: {related_paths} out_path: {out_path}')
    group(related_paths, out_path)
Ejemplo n.º 19
0
def main():
    env = environs.Env()
    in_path = env.path('IN_PATH')
    out_path = env.path('OUT_PATH')
    log_level = env.str('LOG_LEVEL', 'INFO')
    # 30 percent duplication threshold for dedup by default
    dedup_threshold = env.float('DEDUP_THRESHOLD', 0.3)
    log_config.configure(log_level)
    linkmerge(in_path, out_path, dedup_threshold)
Ejemplo n.º 20
0
    def __init__(self, config_file):
        """Creates an object with properties for configuration of scraper

        Initializes properties for a configuration object that will be used
        in setting up the scraper and modifying run-time execution without
        use of a commandline interface or modification of source code.

        Args:
            config_file: path to a TOML configuration file

        Raises:
            environs.EnvValidationError: if the .env file is incorrectly or
                not setup, offer opportunity to quit execution and raise
                error.
        """
        try:
            config = tomlkit.loads(Path(config_file).read_text())
        except FileNotFoundError:
            config = {}

        self.data_path = Path(config.get("data.path", "./data")).resolve()

        self.link_file = Path(
            config.get("prev_links.link_file",
                       "scraped_links.json")).resolve()

        self.login_url = config.get("login.url",
                                    "https://www.rep-am.com/login")
        self.login_headers = config.get("login.headers")
        self.session_headers = config.get("session.headers")
        self.base_url = config.get(
            "session.base_url",
            "https://www.rep-am.com/category/local/records/police/")

        self.connconfig = dict(config.get("database.config", {}))
        self.connconfig.update({"data_path": self.data_path})
        self.inserter_type = config.get("database.inserter", "sqlite")
        self.cleaner_type = config.get("cleaning.cleaner", "basic")

        env_path = Path(config.get("env.path", Path("."))).resolve()

        env = environs.Env()
        env.read_env(env_path / Path(".env"))
        try:
            self.log_info = {
                "log": env.str("log"),
                "pwd": env.str("pwd"),
                "submit": env.str("submit"),
                "redirect_to": env.str("redirect_to"),
                "testcookie": env.str("testcookie"),
            }
        except environs.EnvValidationError as env_error:
            print("Login information not found or incomplete")
            quit_prompt = input("Would you like to quit? (y/n) [y]")
            if not quit_prompt.lower().startswith("n"):
                raise env_error
            self.log_info = {}
def main():
    env = environs.Env()
    data_path = env('DATA_PATH')
    location_path = env('LOCATION_PATH')
    out_path = env('OUT_PATH')
    log_level = env('LOG_LEVEL')
    log_config.configure(log_level)
    log.debug(f'data_dir: {data_path} location_dir: {location_path} out_dir: {out_path}')
    group(data_path, location_path, out_path)
def main():
    """Group input data files without modifying the file paths."""
    env = environs.Env()
    data_path = env('DATA_PATH')
    out_path = env('OUT_PATH')
    log_level = env('LOG_LEVEL')
    log_config.configure(log_level)
    log.debug(f'data_path: {data_path} out_path: {out_path}')
    group(data_path, out_path)
Ejemplo n.º 23
0
def main():
    """Group data by related location group."""
    env = environs.Env()
    data_path = env('DATA_PATH')
    out_path = env('OUT_PATH')
    log_level = env('LOG_LEVEL')
    log_config.configure(log_level)
    log.debug(f'data_path: {data_path} out_path: {out_path}')
    group_related(data_path, out_path)
Ejemplo n.º 24
0
def main() -> None:
    env = environs.Env()
    in_path: Path = env.path('IN_PATH')
    out_path: Path = env.path('OUT_PATH')
    log_level: str = env.log_level('LOG_LEVEL')
    indices: list = env.list('PATH_INDICES')
    log_config.configure(log_level)
    log = structlog.get_logger()
    log.debug(f'in_path: {in_path} out_path: {out_path}')
    order_paths(in_path, out_path, indices)
Ejemplo n.º 25
0
def load_config() -> Config:
    env = environs.Env()
    env.read_env()

    uri = env.str("POSTGRES_URI")
    secret = env.str("JWT_SECRET", "")
    root_token = env.str("ROOT_TOKEN", "")
    mode = env.str("MODE", "full")

    return Config(uri, secret, root_token, mode)
Ejemplo n.º 26
0
def admin(api):
    _console("create-admin")

    env = environs.Env()
    env.read_env()

    return {
        "username": env.str("ADMIN_USERNAME", "admin"),
        "password": env.str("ADMIN_PASSWORD"),
    }
Ejemplo n.º 27
0
def main() -> None:
    env = environs.Env()
    in_path: Path = env.str('IN_PATH')
    out_path: Path = env.path('OUT_PATH')
    file_dirs: list = env.list('FILE_DIR')
    log_level: str = env.log_level('LOG_LEVEL', 'INFO')
    relative_path_index: int = env.int('RELATIVE_PATH_INDEX')
    log_config.configure(log_level)

    l0tol0p(in_path, out_path, file_dirs, relative_path_index)
Ejemplo n.º 28
0
def main() -> None:
    env = environs.Env()
    location_path: Path = env.path('LOCATION_PATH')
    out_path: Path = env.path('OUT_PATH')
    schema_index: int = env.int('SCHEMA_INDEX')
    log_level: str = env.log_level('LOG_LEVEL', 'INFO')
    log_config.configure(log_level)
    link_location_files(location_path=location_path,
                        out_path=out_path,
                        schema_index=schema_index)
Ejemplo n.º 29
0
def main():
    env = environs.Env()
    calibrated_path = env('CALIBRATED_PATH')
    location_path = env('LOCATION_PATH')
    out_path = env('OUT_PATH')
    log_level = env('LOG_LEVEL')
    log_config.configure(log_level)
    log.debug(f'calibrated_dir: {calibrated_path} '
              f'location_dir: {location_path} out_dir: {out_path}')
    group(calibrated_path, location_path, out_path)
def main():
    """Add the related location group name stored in the location file to the output path."""
    env = environs.Env()
    source_path = env('SOURCE_PATH')
    group = env('GROUP')
    out_path = env('OUT_PATH')
    log_level = env('LOG_LEVEL')
    log_config.configure(log_level)
    log.debug(f'source_path: {source_path} group: {group} out_path: {out_path}')
    process(source_path, group, out_path)