Ejemplo n.º 1
0
def test_csv(test_data, tmp_path):
    """
    Write to a csv and make sure it reads back in properly.
    """

    for d in test_data:

        gpm = GenotypePhenotypeMap(genotype=d["genotype"],
                                   wildtype=d["wildtype"],
                                   phenotype=d["phenotype"],
                                   uncertainty=d["uncertainty"])

        # Write out a csv file
        out_file = os.path.join(tmp_path, "tmp.csv")
        gpm.to_csv(out_file, index=False)
        assert os.path.exists(out_file)

        gpm_read = gpmap.read_csv(out_file, wildtype=d["wildtype"])

        # Make sure the written and read gpmaps ar ethe same
        conftest.compare_gpmap(gpm, gpm_read)

        # Do not give wildtype. Should still work because the wildtype was
        # inferred.
        gpm_read = gpmap.read_csv(out_file)
        conftest.compare_gpmap(gpm, gpm_read)

        # Check ability to read labels back in
        site_labels = [f"{x}" for x in range(10, 10 + len(d["wildtype"]), 1)]
        gpm = GenotypePhenotypeMap(genotype=d["genotype"],
                                   wildtype=d["wildtype"],
                                   site_labels=site_labels)
        out_file = os.path.join(tmp_path, "tmp.csv")
        gpm.to_csv(out_file)

        gpm_read = gpmap.read_csv(out_file)

        for i in range(len(gpm_read.site_labels)):

            # Skip virtual site_labels added for invariant sites
            if len(d["mutations"][i]) == 1:
                continue

            assert gpm_read.site_labels[i] == gpm.site_labels[i]

        # Read in with bad wildtype. Should throw warning and then have
        # sequential site labels.
        with pytest.warns(UserWarning):
            gpm_read = gpmap.read_csv(out_file, wildtype=d["mutant"])

        assert np.array_equal(gpm_read.site_labels, range(len(d["wildtype"])))
Ejemplo n.º 2
0
def test_excel(test_data, tmp_path):
    """
    Test reading/writing/fidelity of excel.
    """

    for d in test_data:

        gpm = GenotypePhenotypeMap(genotype=d["genotype"],
                                   wildtype=d["wildtype"],
                                   phenotype=d["phenotype"],
                                   uncertainty=d["uncertainty"])

        # Write excel file
        excel_file = os.path.join(tmp_path, "tmp.xlsx")
        gpm.to_excel(filename=excel_file)
        assert os.path.isfile(excel_file)

        # Read in and make sure it worked.
        new_gpm = gpmap.read_excel(filename=excel_file, wildtype=d["wildtype"])
        conftest.compare_gpmap(gpm, new_gpm)

        # Do not give wildtype. Should still work because the wildtype was
        # inferred.
        gpm_read = gpmap.read_excel(filename=excel_file)
        conftest.compare_gpmap(gpm, gpm_read)

        # Check ability to read labels back in
        site_labels = [f"{x}" for x in range(10, 10 + len(d["wildtype"]), 1)]
        gpm = GenotypePhenotypeMap(genotype=d["genotype"],
                                   wildtype=d["wildtype"],
                                   site_labels=site_labels)
        out_file = os.path.join(tmp_path, "tmp.xlsx")
        gpm.to_excel(out_file)

        gpm_read = gpmap.read_excel(out_file)

        for i in range(len(gpm_read.site_labels)):

            # Skip virtual site_labels added for invariant sites
            if len(d["mutations"][i]) == 1:
                continue

            assert gpm_read.site_labels[i] == gpm.site_labels[i]

        # Read in with bad wildtype. Should throw warning and then have
        # sequential site labels.
        with pytest.warns(UserWarning):
            gpm_read = gpmap.read_excel(out_file, wildtype=d["mutant"])

        assert np.array_equal(gpm_read.site_labels, range(len(d["wildtype"])))
Ejemplo n.º 3
0
def test_json(test_data, tmp_path):
    """
    Test reading/writing/fidelity of json.
    """

    for d in test_data:

        gpm = GenotypePhenotypeMap(wildtype=d["wildtype"],
                                   genotype=d["genotype"],
                                   phenotype=d["phenotype"],
                                   uncertainty=d["uncertainty"])

        # Write json file
        json_file = os.path.join(tmp_path, "tmp.json")
        gpm.to_json(filename=json_file)
        assert os.path.isfile(json_file)

        # Read json file
        new_gpm = gpmap.read_json(filename=json_file)
        conftest.compare_gpmap(gpm, new_gpm)
Ejemplo n.º 4
0
def test_dict(test_data):
    """
    Test converstion of gpmap to dict.
    """

    # Stupidly trivial map
    gpmap.read_dict({"wildtype": "0", "data": {"genotype": ["0"]}})

    # Make sure wildtype check is working
    with pytest.raises(ValueError):
        gpmap.read_dict({"data": {"genotype": ["0"]}})

    # Make sure wildtype length/genotype length check working
    with pytest.raises(ValueError):
        gpmap.read_dict({"wildtype": "01", "data": {"genotype": ["0"]}})

    for d in test_data:

        gpm = GenotypePhenotypeMap(wildtype=d["wildtype"],
                                   genotype=d["genotype"],
                                   phenotype=d["phenotype"],
                                   uncertainty=d["uncertainty"])

        # Write out as a dcitionary
        gpm_as_dict = gpm.to_dict()

        # Check wildtype meta data, mutations meta data
        assert gpm_as_dict["wildtype"] == d["wildtype"]
        for i in range(len(gpm_as_dict["mutations"])):
            assert np.array_equal(gpm_as_dict["mutations"][i],
                                  d["mutations"][i])

        # This is a pandas data conversion. Don't check in detail, just make sure
        # the conversion dumped out a a dict.
        assert type(gpm_as_dict["data"]) is dict

        # Read dictionary back in and make sure it's the same
        new_gpm = gpmap.read_dict(gpm_as_dict)
        conftest.compare_gpmap(gpm, new_gpm)
Ejemplo n.º 5
0
def test_dataframe(test_data, tmp_path):
    """
    Test reading of dataframe.
    """

    for d in test_data:

        # Pretty standard map
        gpm = GenotypePhenotypeMap(genotype=d["genotype"],
                                   wildtype=d["wildtype"],
                                   phenotype=d["phenotype"],
                                   uncertainty=d["uncertainty"])

        df = pd.DataFrame({
            "genotype": d["genotype"],
            "phenotype": d["phenotype"],
            "uncertainty": d["uncertainty"]
        })
        gpm_from_df = gpmap.read_dataframe(df, wildtype=d["wildtype"])
        conftest.compare_gpmap(gpm, gpm_from_df)

        # Minimal map
        gpm = GenotypePhenotypeMap(wildtype=d["wildtype"],
                                   genotype=d["genotype"])
        df = pd.DataFrame({"genotype": d["genotype"]})
        gpm_from_df = gpmap.read_dataframe(df, wildtype=d["wildtype"])
        conftest.compare_gpmap(gpm, gpm_from_df)

        # Read without wildtype --> should still work
        gpm_from_df = gpmap.read_dataframe(df)
        conftest.compare_gpmap(gpm, gpm_from_df)

        # Map without genotype (fail)
        df = pd.DataFrame({"phenotype": d["phenotype"]})
        with pytest.raises(ValueError):
            gpm_from_df = gpmap.read_dataframe(df, wildtype=d["wildtype"])