Example #1
0
def loader_func(**kwargs):
    path = handle_path(kwargs.pop("path"), kwargs)
    return pd.read_csv(
        path, **{
            k: v
            for k, v in kwargs.items() if k in loader_prop_keys(LOADER_PROPS)
        })
Example #2
0
def load_file(sheet_name=None, **kwargs):
    path = kwargs.pop("path")
    engine = "xlrd" if path.endswith("xls") else "openpyxl"
    path = handle_path(path, kwargs)
    dfs = pd.read_excel(path,
                        sheet_name=sheet_name,
                        engine=engine,
                        **{
                            k: v
                            for k, v in kwargs.items()
                            if k in loader_prop_keys(LOADER_PROPS)
                        })
    if dfs is None or not len(dfs):
        raise Exception("Failed to load Excel file. Returned no data.")
    return dfs
Example #3
0
def loader_func(**kwargs):
    normalize = kwargs.pop("normalize", False)

    def resp_handler(resp):
        return resp.json() if normalize else resp.text

    path = handle_path(kwargs.pop("path"), kwargs, resp_handler=resp_handler)
    if normalize:
        normalize_func = (pd.json_normalize
                          if is_pandas1() else pd.io.json.json_normalize)
        return normalize_func(path, **kwargs)
    return pd.read_json(
        path, **{
            k: v
            for k, v in kwargs.items() if k in loader_prop_keys(LOADER_PROPS)
        })
Example #4
0
def loader_func(**kwargs):
    path = kwargs.pop("path")
    engine = "xlrd" if path.endswith("xls") else "openpyxl"
    sheet_name = kwargs.pop("sheet", None)
    path = handle_path(path, kwargs)
    dfs = pd.read_excel(
        path,
        sheet_name=sheet_name,
        engine=engine,
        **{k: v for k, v in kwargs.items() if k in loader_prop_keys(LOADER_PROPS)}
    )
    if dfs is None or not len(dfs):
        raise Exception("Failed to load Excel file. Returned no data.")
    if sheet_name:
        if sheet_name not in dfs:
            raise Exception(
                "Excel file loaded but there was no sheet named '{}'.".format(
                    sheet_name
                )
            )
        return dfs[sheet_name]
    # this is required because there is no support for loading multiple datasets at once from the CLI
    # I can add this later...
    return dfs[list(dfs.keys())[0]]