예제 #1
0
def import_data(data_retreive_method, paths):
    """pipeline to import data from local or aws"""
    json_lst = []
    if data_retreive_method == "Local file system":
        try:
            for path in paths:
                json_lst.append(md.collect_md(path))
        except FileNotFoundError as err:
            st.sidebar.text(err)
            readme()
    else:
        passbuild = st.sidebar.checkbox("Only retreive build success records",
                                        value=True)
        try:
            configs = gh.auth_config()
            for path in paths:
                response = gh.get_request(path, passbuild, **configs)
                json_lst.append(ju.clean_report(response))
        except (EnvironmentError, Exception) as err:
            st.sidebar.error(err)
            readme()
    # when data is retreived
    if json_lst:
        raw_df = pd.DataFrame()
        for item in json_lst:
            single_df = pd.DataFrame(item)
            raw_df = pd.concat([raw_df, single_df]).fillna("")
        tidy_df = df_preprocess(raw_df)
        return tidy_df, raw_df
예제 #2
0
def path_import(paths):
    """Read and compile files from given path."""
    json_lst = []
    try:
        for path in paths:
            json_lst.append(md.collect_md(path))
        return json_lst
    except FileNotFoundError as err:
        st.sidebar.error(err)
예제 #3
0
def test_collect_md_with_two_inputs(tmp_path):
    """Test that md pipeline works"""
    directory = tmp_path / "sub"
    directory.mkdir()
    p_1 = directory / "hello.md"
    p_2 = directory / "world.md"
    txt = "Some solutions that can be developed to \
avoid harm or fix the harm are conducting more research and not offering it \
to a selective group of people. More research needs to be done especially in \
terms of embryos. In addition, if germline editing is only offered to a \
select group of people, the wealthy, it will be problematic for the class \
system."

    p_1.write_text(f"# Reflection by\n\n## header1\n{txt}\n## header2\n{txt}")
    p_2.write_text(f"# Reflection by\n\n## header1\n{txt}\n## header2\n{txt}")
    expected = {
        "reflection by": ["", ""],
        "header1": [txt + " ", txt + " "],
        "header2": [txt + " ", txt + " "],
    }
    output = md.collect_md(directory, is_clean=False)
    assert expected == output