Exemple #1
0
def gpm(test_data):
    """
    Create a genotype-phenotype map
    """

    d = test_data[0]

    return GenotypePhenotypeMap(genotype=d["genotype"],
                                phenotype=d["phenotype"],
                                wildtype=d["wildtype"])
Exemple #2
0
def gpm():
    """Create a genotype-phenotype map"""
    wildtype = "000"
    genotypes = ["000", "001", "010", "100", "011", "101", "110", "111"]
    phenotypes = [0.1, 0.1, 0.5, 0.4, 0.2, 0.8, 0.5, 1.0]
    stdeviations = 0.1
    return GenotypePhenotypeMap(wildtype,
                                genotypes,
                                phenotypes,
                                stdeviations=stdeviations)
Exemple #3
0
    def fit_transform(self, X=None, y=None, **kwargs):
        self.fit(X=X, y=y, **kwargs)
        ypred = self.predict(X=X)

        # Transform map.
        gpm = GenotypePhenotypeMap.read_dataframe(
            dataframe=self.gpm.data[ypred==1],
            wildtype=self.gpm.wildtype,
            mutations=self.gpm.mutations
        )
        return gpm
Exemple #4
0
def test_json(test_data, tmp_path):
    """
    Test reading/writing/fidelity of json.
    """

    for d in test_data:

        gpm = GenotypePhenotypeMap(wildtype=d["wildtype"],
                                   genotype=d["genotype"],
                                   phenotype=d["phenotype"],
                                   uncertainty=d["uncertainty"])

        # Write json file
        json_file = os.path.join(tmp_path, "tmp.json")
        gpm.to_json(filename=json_file)
        assert os.path.isfile(json_file)

        # Read json file
        new_gpm = gpmap.read_json(filename=json_file)
        conftest.compare_gpmap(gpm, new_gpm)
Exemple #5
0
def test_csv(test_data, tmp_path):
    """
    Write to a csv and make sure it reads back in properly.
    """

    for d in test_data:

        gpm = GenotypePhenotypeMap(genotype=d["genotype"],
                                   wildtype=d["wildtype"],
                                   phenotype=d["phenotype"],
                                   uncertainty=d["uncertainty"])

        # Write out a csv file
        out_file = os.path.join(tmp_path, "tmp.csv")
        gpm.to_csv(out_file, index=False)
        assert os.path.exists(out_file)

        gpm_read = gpmap.read_csv(out_file, wildtype=d["wildtype"])

        # Make sure the written and read gpmaps ar ethe same
        conftest.compare_gpmap(gpm, gpm_read)

        # Do not give wildtype. Should still work because the wildtype was
        # inferred.
        gpm_read = gpmap.read_csv(out_file)
        conftest.compare_gpmap(gpm, gpm_read)

        # Check ability to read labels back in
        site_labels = [f"{x}" for x in range(10, 10 + len(d["wildtype"]), 1)]
        gpm = GenotypePhenotypeMap(genotype=d["genotype"],
                                   wildtype=d["wildtype"],
                                   site_labels=site_labels)
        out_file = os.path.join(tmp_path, "tmp.csv")
        gpm.to_csv(out_file)

        gpm_read = gpmap.read_csv(out_file)

        for i in range(len(gpm_read.site_labels)):

            # Skip virtual site_labels added for invariant sites
            if len(d["mutations"][i]) == 1:
                continue

            assert gpm_read.site_labels[i] == gpm.site_labels[i]

        # Read in with bad wildtype. Should throw warning and then have
        # sequential site labels.
        with pytest.warns(UserWarning):
            gpm_read = gpmap.read_csv(out_file, wildtype=d["mutant"])

        assert np.array_equal(gpm_read.site_labels, range(len(d["wildtype"])))
Exemple #6
0
def test_add_missing_genotypes(test_data):

    # Create list of ~100 unqiue genotype sampled from 2^L space
    L = 8
    genotype = []
    for i in range(100):
        genotype.append("".join([random.choice(["0", "1"]) for _ in range(L)]))
    genotype = list(set(genotype))
    num_now = len(genotype)

    # Build map and make sure map looks like we think before adding missing
    gpm = GenotypePhenotypeMap(genotype=genotype,
                               phenotype=np.ones(len(genotype)))
    assert gpm.num_genotypes == num_now
    assert gpm.length == L

    num_possible = 2**L
    num_new = num_possible - num_now

    # Make sure mem check cutoff is working
    with pytest.raises(ValueError):
        gpm.add_missing_genotypes(mem_check_cutoff=0)

    # add missing genotype
    gpm.add_missing_genotypes()

    # Make sure number of genotype is now correct
    assert gpm.num_genotypes == num_possible

    # make sure dataframe looks right...
    assert np.sum(gpm._data.inferred) == num_new
    assert np.sum(np.isnan(gpm._data.n_mutations)) == 0
    assert np.sum(np.isnan(gpm._data.phenotype)) == num_new
Exemple #7
0
def test_excel(test_data, tmp_path):
    """
    Test reading/writing/fidelity of excel.
    """

    for d in test_data:

        gpm = GenotypePhenotypeMap(genotype=d["genotype"],
                                   wildtype=d["wildtype"],
                                   phenotype=d["phenotype"],
                                   uncertainty=d["uncertainty"])

        # Write excel file
        excel_file = os.path.join(tmp_path, "tmp.xlsx")
        gpm.to_excel(filename=excel_file)
        assert os.path.isfile(excel_file)

        # Read in and make sure it worked.
        new_gpm = gpmap.read_excel(filename=excel_file, wildtype=d["wildtype"])
        conftest.compare_gpmap(gpm, new_gpm)

        # Do not give wildtype. Should still work because the wildtype was
        # inferred.
        gpm_read = gpmap.read_excel(filename=excel_file)
        conftest.compare_gpmap(gpm, gpm_read)

        # Check ability to read labels back in
        site_labels = [f"{x}" for x in range(10, 10 + len(d["wildtype"]), 1)]
        gpm = GenotypePhenotypeMap(genotype=d["genotype"],
                                   wildtype=d["wildtype"],
                                   site_labels=site_labels)
        out_file = os.path.join(tmp_path, "tmp.xlsx")
        gpm.to_excel(out_file)

        gpm_read = gpmap.read_excel(out_file)

        for i in range(len(gpm_read.site_labels)):

            # Skip virtual site_labels added for invariant sites
            if len(d["mutations"][i]) == 1:
                continue

            assert gpm_read.site_labels[i] == gpm.site_labels[i]

        # Read in with bad wildtype. Should throw warning and then have
        # sequential site labels.
        with pytest.warns(UserWarning):
            gpm_read = gpmap.read_excel(out_file, wildtype=d["mutant"])

        assert np.array_equal(gpm_read.site_labels, range(len(d["wildtype"])))
Exemple #8
0
def test_pos(test_data):

    # requires G
    with pytest.raises(TypeError):
        gpgraph.pyplot.pos.flattened()

    # pass dumb argument
    with pytest.raises(ValueError):
        gpgraph.pyplot.pos.flattened("stupid")

    for d in test_data:

        gpm = GenotypePhenotypeMap(genotypes=d["genotypes"],
                                   wildtype=d["wildtype"],
                                   phenotypes=d["phenotypes"])

        G = GenotypePhenotypeGraph()

        # test check for loaded dataset.
        with pytest.raises(ValueError):
            gpgraph.pyplot.pos.flattened(G)

        # Load in data
        G.add_gpm(gpm)

        # Should work
        ret = gpgraph.pyplot.pos.flattened(G)

        # Check basic sanity of returns
        assert type(ret) is dict
        for k in ret:
            G.nodes[k]
            assert type(ret[k]) is list
            assert len(ret[k]) == 2

        # Check for bad scale values
        with pytest.raises(ValueError):
            gpgraph.pyplot.pos.flattened(G, scale=0)
        with pytest.raises(ValueError):
            gpgraph.pyplot.pos.flattened(G, scale=-1)

        # these should work
        ret = gpgraph.pyplot.pos.flattened(G, scale=10.0)
        assert type(ret) is dict

        ret = gpgraph.pyplot.pos.flattened(G, scale=22.3)
        assert type(ret) is dict

        # make sure it takes vertical parameter. not really testing if it has
        # effect (test for graphs are annoying... )
        ret = gpgraph.pyplot.pos.flattened(G, scale=1, vertical=True)
        assert type(ret) is dict
Exemple #9
0
def test_dict(test_data):
    """
    Test converstion of gpmap to dict.
    """

    # Stupidly trivial map
    gpmap.read_dict({"wildtype": "0", "data": {"genotype": ["0"]}})

    # Make sure wildtype check is working
    with pytest.raises(ValueError):
        gpmap.read_dict({"data": {"genotype": ["0"]}})

    # Make sure wildtype length/genotype length check working
    with pytest.raises(ValueError):
        gpmap.read_dict({"wildtype": "01", "data": {"genotype": ["0"]}})

    for d in test_data:

        gpm = GenotypePhenotypeMap(wildtype=d["wildtype"],
                                   genotype=d["genotype"],
                                   phenotype=d["phenotype"],
                                   uncertainty=d["uncertainty"])

        # Write out as a dcitionary
        gpm_as_dict = gpm.to_dict()

        # Check wildtype meta data, mutations meta data
        assert gpm_as_dict["wildtype"] == d["wildtype"]
        for i in range(len(gpm_as_dict["mutations"])):
            assert np.array_equal(gpm_as_dict["mutations"][i],
                                  d["mutations"][i])

        # This is a pandas data conversion. Don't check in detail, just make sure
        # the conversion dumped out a a dict.
        assert type(gpm_as_dict["data"]) is dict

        # Read dictionary back in and make sure it's the same
        new_gpm = gpmap.read_dict(gpm_as_dict)
        conftest.compare_gpmap(gpm, new_gpm)
Exemple #10
0
    def fit_transform(self, X=None, y=None, **kwargs):
        self.fit(X=X, y=y, **kwargs)

        linear_phenotypes = self.transform(X=X, y=y)

        # Transform map.
        gpm = GenotypePhenotypeMap.read_dataframe(
            dataframe=self.gpm.data,
            wildtype=self.gpm.wildtype,
            mutations=self.gpm.mutations
        )

        gpm.data['phenotypes'] = linear_phenotypes
        return gpm
Exemple #11
0
def split_gpm(gpm, idx=None, nobs=None, fraction=None):
    """Split GenotypePhenotypeMap into two sets, a training and a test set.

    Parameters
    ----------
    data : pandas.DataFrame
        full dataset to split.

    idx : list
        List of indices to include in training set

    nobs : int
        number of observations in training.

    fraction : float
        fraction in training set.

    Returns
    -------
    train_gpm : GenotypePhenotypeMap
        training set.

    test_gpm : GenotypePhenotypeMap
        test set.
    """
    train, test = split_data(gpm.data, idx=idx, nobs=nobs, fraction=fraction)

    train_gpm = GenotypePhenotypeMap.read_dataframe(train,
                                                    wildtype=gpm.wildtype,
                                                    mutations=gpm.mutations)

    test_gpm = GenotypePhenotypeMap.read_dataframe(test,
                                                   wildtype=gpm.wildtype,
                                                   mutations=gpm.mutations)

    return train_gpm, test_gpm
Exemple #12
0
def test_dataframe(test_data, tmp_path):
    """
    Test reading of dataframe.
    """

    for d in test_data:

        # Pretty standard map
        gpm = GenotypePhenotypeMap(genotype=d["genotype"],
                                   wildtype=d["wildtype"],
                                   phenotype=d["phenotype"],
                                   uncertainty=d["uncertainty"])

        df = pd.DataFrame({
            "genotype": d["genotype"],
            "phenotype": d["phenotype"],
            "uncertainty": d["uncertainty"]
        })
        gpm_from_df = gpmap.read_dataframe(df, wildtype=d["wildtype"])
        conftest.compare_gpmap(gpm, gpm_from_df)

        # Minimal map
        gpm = GenotypePhenotypeMap(wildtype=d["wildtype"],
                                   genotype=d["genotype"])
        df = pd.DataFrame({"genotype": d["genotype"]})
        gpm_from_df = gpmap.read_dataframe(df, wildtype=d["wildtype"])
        conftest.compare_gpmap(gpm, gpm_from_df)

        # Read without wildtype --> should still work
        gpm_from_df = gpmap.read_dataframe(df)
        conftest.compare_gpmap(gpm, gpm_from_df)

        # Map without genotype (fail)
        df = pd.DataFrame({"phenotype": d["phenotype"]})
        with pytest.raises(ValueError):
            gpm_from_df = gpmap.read_dataframe(df, wildtype=d["wildtype"])
Exemple #13
0
def test_draw_node_labels(test_data):

    for d in test_data:

        gpm = GenotypePhenotypeMap(genotypes=d["genotypes"],
                                   phenotypes=d["phenotypes"])

        G = gpgraph.GenotypePhenotypeGraph()
        G.add_gpm(gpm)

        fig, ax = plt.subplots(figsize=(10, 10))

        # Make sure it draws something.
        node_labels = draw_node_labels(G, flattened(G), ax)
        ax.plot  # <- will throw attribute error if this is not right
        len(node_labels)
Exemple #14
0
def test_genotype_is_in():

    gpm = GenotypePhenotypeMap(genotype=["SG", "PF", "SF", "PG"],
                               wildtype="SG")
    with pytest.raises(TypeError):
        gpm.genotype_is_in(None)

    assert gpm.genotype_is_in("SG") is True
    v = gpm.genotype_is_in(["SG", "PF", "SF", "PG"])
    assert len(v) == 4
    assert sum(v) == 4
Exemple #15
0
def test__nx_wrapper(test_data):

    gpm = GenotypePhenotypeMap(genotypes=["AA", "AB", "BA", "BB"])

    G = gpgraph.GenotypePhenotypeGraph()
    G.add_gpm(gpm)
    pos = flattened(G)

    fig, ax = plt.subplots(figsize=(10, 10))

    # Should throw warning even though bad arg.
    with pytest.warns(UserWarning):
        ret = _nx_wrapper(nx.draw_networkx_nodes,
                          G=G,
                          pos=pos,
                          ax=ax,
                          not_a_real_argument=10)
Exemple #16
0
def gpmap_from_gpmap(
    original_gpmap,
    new_genotypes,
    new_phenotypes,
    new_n_replicates=1,
    new_stdeviations=None,
):
    """Generate a GenotypePhenotypeMap from another GenotypePhenotypeMap
    with new genotypes, phenotypes, n_replicates, and stdevations.
    """
    gpm = original_gpmap
    return GenotypePhenotypeMap(wildtype=gpm.wildtype,
                                mutations=gpm.mutations,
                                genotypes=new_genotypes,
                                phenotypes=new_phenotypes,
                                stdeviations=new_stdeviations,
                                n_replicates=new_n_replicates)
Exemple #17
0
def gpvolve_base():
    wildtype = "AAA"

    genotypes = ["AAA", "AAB", "ABA", "BAA", "ABB", "BAB", "BBA", "BBB"]

    binary = ['000', '001', '010', '100', '011', '101', '110', '111']

    mutations = {
        0: ["A", "B"],
        1: ["A", "B"],
        2: ["A", "B"],
    }
    phenotypes = np.random.rand(len(genotypes))
    tmp_gpm_file = GenotypePhenotypeMap(wildtype=wildtype,
                                        genotypes=genotypes,
                                        phenotypes=phenotypes,
                                        log_transform=False,
                                        mutations=mutations)
    gpv = GenotypePhenotypeMSM(tmp_gpm_file)
    return gpv
Exemple #18
0
def test_getters(test_data):
    """
    Test sundry getters.
    """

    for d in test_data:

        gpm = GenotypePhenotypeMap(wildtype=d["wildtype"],
                                   genotype=d["genotype"],
                                   phenotype=d["phenotype"],
                                   uncertainty=d["uncertainty"])

        assert gpm.length == d["length"]
        assert gpm.num_genotypes == d["num_genotypes"]
        assert gpm.wildtype == d["wildtype"]
        assert gpm.mutant == d["mutant"]

        # Make sure that the .data is the object, *not* a copy of ._data
        assert gpm.data is gpm._data
        assert gpm.encoding_table is gpm._encoding_table

        for i in range(len(gpm.mutations)):
            assert np.array_equal(gpm.mutations[i], d["mutations"][i])

        assert np.array_equal(gpm.genotype, d["genotype"])
        assert np.array_equal(gpm.binary, d["binary"])
        assert np.array_equal(gpm.phenotype, d["phenotype"])
        assert np.array_equal(gpm.uncertainty, d["uncertainty"])

        # Make sure initial values of neighors are set properly
        assert gpm.neighbors is None
        assert gpm.neighbor_function is None
        assert gpm.neighbor_cutoff is None

        # Run get_neighbors call, which will now populate neighbors,
        # neighbor_function, and neighbor_cutoff
        gpm.get_neighbors()
        assert isinstance(gpm.neighbors, pd.DataFrame)
        assert gpm._neighbors is gpm.neighbors
        assert gpm.neighbor_function == "hamming"
        assert gpm.neighbor_cutoff == 1

        gpm.get_neighbors(cutoff=17)
        assert isinstance(gpm.neighbors, pd.DataFrame)
        assert gpm._neighbors is gpm.neighbors
        assert gpm.neighbor_function == "hamming"
        assert gpm.neighbor_cutoff == 17
Exemple #19
0
def test_node_list_sanity():

    G = GenotypePhenotypeGraph()
    gpm = GenotypePhenotypeMap(genotypes=["AA", "AB", "AC"])
    G.add_gpm(gpm)

    bad_node_lists = [[-200, 300, 0], ["cow", "come", "home"], [-1, 0, 1],
                      [[], [], 0], [0, 0, 0], [0, 1, 2, 3]]
    for b in bad_node_lists:
        with pytest.raises(ValueError):
            check.node_list_sanity(b, G)

    check.node_list_sanity([0, 1, 2], G)
    check.node_list_sanity([0, 1], G)
    check.node_list_sanity([0], G)
    check.node_list_sanity([0, 2], G)

    with pytest.raises(ValueError):
        check.node_list_sanity([0, 1, 2], "G")

    with pytest.raises(ValueError):
        check.node_list_sanity([0, 1, 2], GenotypePhenotypeGraph)
Exemple #20
0
def test_get_neighbors():

    for use_cython in [False, True]:

        print("Use cython?", use_cython)

        # Check hamming = 1
        gpm = GenotypePhenotypeMap(genotype=["SG", "PF", "SF", "PG"])
        gpm.get_neighbors("hamming", cutoff=1, use_cython=use_cython)

        new_edges = list(gpm._neighbors.loc[:, "edge"])
        new_edges.sort()
        hamming_edges = [(0, 0), (0, 2), (2, 0), (0, 3), (3, 0), (1, 1),
                         (1, 2), (2, 1), (1, 3), (3, 1), (2, 2), (3, 3)]

        hamming_edges.sort()
        for i in range(len(new_edges)):
            assert np.array_equal(hamming_edges[i], new_edges[i])

        # Check hamming = 2
        gpm = GenotypePhenotypeMap(genotype=["SG", "PF", "SF", "PG"])
        gpm.get_neighbors("hamming", cutoff=2, use_cython=use_cython)

        new_edges = list(gpm._neighbors.loc[:, "edge"])
        new_edges.sort()
        hamming_edges = [(0, 0), (0, 1), (1, 0), (0, 2), (2, 0), (0, 3),
                         (3, 0), (1, 1), (1, 2), (2, 1), (1, 3), (3, 1),
                         (2, 2), (2, 3), (3, 2), (3, 3)]
        hamming_edges.sort()
        for i in range(len(new_edges)):
            assert np.array_equal(hamming_edges[i], new_edges[i])

        # Check amino acid codon = 1
        gpm = GenotypePhenotypeMap(genotype=["SG", "PF", "SF", "PG"])
        gpm.get_neighbors("codon", cutoff=1, use_cython=use_cython)

        new_edges = list(gpm._neighbors.loc[:, "edge"])
        new_edges.sort()
        codon_edges = [(0, 0), (0, 3), (3, 0), (1, 1), (1, 2), (2, 1), (2, 2),
                       (3, 3)]
        codon_edges.sort()
        for i in range(len(new_edges)):
            assert np.array_equal(codon_edges[i], new_edges[i])
Exemple #21
0
def test_sample_genotypes():

    gpm = GenotypePhenotypeMap(genotype=["SG", "PF", "SF", "PG"],
                               wildtype="SG")
    with pytest.raises(ValueError):
        gpm.sample_genotypes(num_to_return=4, fx_to_return=1)
    with pytest.raises(ValueError):
        gpm.sample_genotypes(num_to_return=None, fx_to_return=None)
    with pytest.raises(ValueError):
        gpm.sample_genotypes(num_to_return=0.5, fx_to_return=0.5)

    bad_value = [-1, "stupid", 5, [], {"test": 1}]
    for v in bad_value:
        with pytest.raises(ValueError):
            gpm.sample_genotypes(num_to_return=v)

    for i in range(1, 5):
        x = gpm.sample_genotypes(num_to_return=i)
        assert x is not gpm
        assert len(x.data) == i

    bad_value = [-1, "stupid", 1.1, [], {"test": 1}]
    for v in bad_value:
        with pytest.raises(ValueError):
            gpm.sample_genotypes(fx_to_return=v)

    for i, fx in enumerate([0.25, 0.5, 0.75, 1.0]):
        x = gpm.sample_genotypes(fx_to_return=fx)
        assert x is not gpm
        assert len(x.data) == i + 1

    x = gpm.sample_genotypes(fx_to_return=0.1)
    assert len(x.data) == 1

    bad_value = ["stupid", 5, ["not", "really"], [[12], [15, 16]], np.zeros(5)]
    for b in bad_value:
        with pytest.raises(ValueError):
            gpm.sample_genotypes(fx_to_return=1, keep_genotypes=b)

    with pytest.raises(ValueError):
        gpm.sample_genotypes(fx_to_return=0.25, keep_genotypes=["SG", "PF"])

    # Get exactly those two keep_genotypes
    x = gpm.sample_genotypes(fx_to_return=0.5, keep_genotypes=["SF", "PG"])
    assert np.sum(x.data.genotype.isin(["SF", "PG"])) == 2

    # Get exactly those two keep_genotypes
    with pytest.raises(ValueError):
        x = gpm.sample_genotypes(fx_to_return=0.5,
                                 keep_genotypes=["SF", "PG"],
                                 keep_wt=True)

    x = gpm.sample_genotypes(fx_to_return=0.75,
                             keep_genotypes=["SF", "PG"],
                             keep_wt=True)
    assert np.sum(x.data.genotype.isin(["SG", "SF", "PG"])) == 3

    x = gpm.sample_genotypes(fx_to_return=0.75)
    assert len(x.data) == 3
    assert len(np.unique(x.data.genotype)) == 3
Exemple #22
0
 def read_csv(cls, fname, wildtype, mutations=None):
     """Read graph from csv file."""
     gpm = GenotypePhenotypeMap.read_csv(fname,
                                         wildtype=wildtype,
                                         mutations=mutations)
     return cls(gpm)
Exemple #23
0
def test__check_internal_consistency():
    """
    Test private _check_internal_consistency method.
    """

    # Bypass wildtype setter. Direct access to data frame, no change in binary.
    # Access via .data getter and voila! should change.
    gpm = GenotypePhenotypeMap(["AA", "AB", "BA", "BB"])
    assert np.array_equal(["00", "01", "10", "11"], gpm.binary)

    gpm._wildtype = "BB"
    assert np.array_equal(["00", "01", "10", "11"], gpm._data.loc[:, "binary"])
    assert np.array_equal(["11", "10", "01", "00"], gpm.data.loc[:, "binary"])
    assert np.array_equal(["11", "10", "01", "00"], gpm.binary)

    # Bypass site_labels setter. Direct access to data frame, no change in names.
    # Access via .data getter and voila! should change.
    gpm = GenotypePhenotypeMap(["AA", "AB", "BA", "BB"])
    assert np.array_equal(["wildtype", "A1B", "A0B", "A0B/A1B"], gpm.name)

    gpm._site_labels = [1, 2]
    assert np.array_equal(["wildtype", "A1B", "A0B", "A0B/A1B"],
                          gpm._data.loc[:, "name"])
    assert np.array_equal(["wildtype", "A2B", "A1B", "A1B/A2B"],
                          gpm.data.loc[:, "name"])
    assert np.array_equal(["wildtype", "A2B", "A1B", "A1B/A2B"], gpm.name)

    # Bypass mutations setter. Direct access to data frame, no change in binary.
    # Access via .data getter and voila! should change.
    gpm = GenotypePhenotypeMap(["AA", "AB", "BA", "BB"])
    assert np.array_equal(["00", "01", "10", "11"], gpm.binary)

    gpm._mutations = [["A", "B", "C"], ["A", "B"]]
    assert np.array_equal(["00", "01", "10", "11"], gpm._data.loc[:, "binary"])
    assert np.array_equal(["000", "001", "100", "101"], gpm.data.loc[:,
                                                                     "binary"])
    assert np.array_equal(["000", "001", "100", "101"], gpm.binary)

    # Change a genotype to have a state not previously seen. Direct access to
    # gpm._data or gpm._mutations doesn't catch it. But gpm.data does.
    gpm = GenotypePhenotypeMap(["AA", "AB", "BA", "BB"])
    assert np.array_equal(["00", "01", "10", "11"], gpm.binary)
    gpm._data.loc[0, "genotype"] = "CA"
    assert np.array_equal(["00", "01", "10", "11"], gpm._data.loc[:, "binary"])
    assert np.array_equal(["010", "001", "100", "101"], gpm.binary)
Exemple #24
0
def test_add_model(test_data):

    for d in test_data:

        gpm = GenotypePhenotypeMap(genotypes=d["genotypes"],
                                   wildtype=d["wildtype"],
                                   phenotypes=d["phenotypes"],
                                   bad_pheno=d["phenotypes"])

        # Give negative phenotypes (which models except ratio will not like)
        gpm.data.loc[:, "bad_pheno"] = gpm.data.loc[:, "bad_pheno"] * -1

        G = GenotypePhenotypeGraph()

        # Make sure model checks properly for adding gpm first
        with pytest.raises(ValueError):
            G.add_model()

        # Use basic hamming connectivity model
        G.add_gpm(gpm, "hamming", 1)

        # Try calculating ratio and sswm models
        for m in ["ratio", "sswm"]:
            G.add_model(column="phenotypes", model=m)

        # Try calculating moran and mcclandish with multiple population sizes
        for m in ["moran", "mcclandish"]:
            for N in [1, 3, 10, 30, 100, 300, 1000, 3000, 10000]:
                G.add_model(column="phenotypes", model=m, population_size=N)

        # Pass in some crappy data
        with pytest.raises(ValueError):
            G.add_model(column="bad_pheno", model="sswm")

        # should work for ratio
        G.add_model(column="bad_pheno", model="ratio")

        # Pass in string data
        with pytest.raises(ValueError):
            G.add_model(column="genotypes", model="sswm")

        # don't pass in required arg
        with pytest.raises(ValueError):
            G.add_model(column="phenotypes", model="mcclandish")

        # pass in nonsensical version of required arg
        with pytest.raises(ValueError):
            G.add_model(column="phenotypes",
                        model="mcclandish",
                        population_size=-5)

        # pass in undefined model
        with pytest.raises(ValueError):
            G.add_model(column="phenotypes", model="not_a_real_model")

        # pass in unrecognized arg to a model
        with pytest.raises(ValueError):
            G.add_model(column="phenotypes", model="sswm", population_size=10)

        # pass in incompatible model
        def bad_model(v1):
            return 5

        with pytest.raises(ValueError):
            G.add_model(column="phenotypes", model=bad_model)

        # okay model, only float args
        def okay_model(v1, v2):
            return 5

        G.add_model(column="phenotypes", model=okay_model)

        # okay model, float and custom kwarg
        def okay_model(v1, v2, needed_kwarg):
            return 5

        G.add_model(column="phenotypes", model=okay_model, needed_kwarg=5)

        # check node property setting for this model where we know the answer
        for e in G.edges:
            assert G.edges[e]["prob"] == 5

        # pass in no model (should set all prob to 1)
        G.add_model(column="phenotypes")
        for e in G.edges:
            assert G.edges[e]["prob"] == 1

        # pass in nothing (all prob should be 1)
        G.add_model()
        for e in G.edges:
            assert G.edges[e]["prob"] == 1

        # pass in no data with model. prob should all be 0 for sswm because
        # fitness is same all genotypes.
        G.add_model(model="sswm")
        for e in G.edges:
            assert G.edges[e]["prob"] == 0
def load(name):
    """Loads a provide dataset."""
    fname = "{}.json".format(name)
    fpath = get_dataset_path(fname)
    gpm = GenotypePhenotypeMap.read_json(fpath)
    return gpm
Exemple #26
0
 def read_json(cls, fname):
     """
     Read graph from json file.
     """
     gpm = GenotypePhenotypeMap.read_json(fname)
     return cls(gpm)
Exemple #27
0
def test_wildtype_setter(test_data):
    """
    This is a complicated setter that triggers recalcuation of the binary
    map.
    """

    for d in test_data:

        gpm = GenotypePhenotypeMap(wildtype=d["wildtype"],
                                   genotype=d["genotype"])

        # Trivial set (should not change)
        gpm.wildtype = d["wildtype"]
        assert np.array_equal(gpm.binary, d["binary"])
        assert gpm.mutant == d["mutant"]

        # Something with wrong dimension
        with pytest.raises(ValueError):
            gpm.wildtype = "1"

        # Feed in something that can't be interpreted as iterable
        with pytest.raises(ValueError):
            gpm.wildtype = 0

        # Feed in something with wrong dimension
        with pytest.raises(ValueError):
            gpm.wildtype = []

        # Feed in genotype with states not seen in the test datasets
        with pytest.raises(ValueError):
            gpm.wildtype = "".join(["Z" for _ in range(len(gpm.wildtype))])

        # Feed in wildtype as list, not str
        with pytest.raises(ValueError):
            gpm.wildtype = [s for s in gpm.wildtype]

        # Should work
        gpm.wildtype = d["mutant"]

        # Check that wildtype actually fhanged
        assert gpm.wildtype == d["mutant"]

        # Check mutant (should differ at all variable sites)
        for i in range(len(gpm.wildtype)):
            if len(d["mutations"][i]) != 1:
                assert gpm.mutant[i] != gpm.wildtype[i]

        # Make sure n_mutations has flipped properly.
        for i in range(len(gpm.genotype)):
            counts = sum([
                1 for j in range(len(gpm.genotype[i]))
                if gpm.genotype[i][j] != gpm.wildtype[j]
            ])
            assert gpm.n_mutations[i] == counts

        # Check to make sure the binary was recalculated. This tests whether
        # the encoding table and binary are different from before. Does *not*
        # check accuracy.  This check is done in the
        # test_utils.test_genotypes_to_binary function.
        binary = utils.genotypes_to_binary(gpm.genotype, gpm.encoding_table)
        assert np.array_equal(binary, gpm.binary)
        assert not np.array_equal(binary, d["binary"])
Exemple #28
0
Use a linear, logistic regression model to estimate the positive/negative effects
of mutations.
"""

# Imports
import matplotlib.pyplot as plt

from gpmap import GenotypePhenotypeMap
from epistasis.models import EpistasisLogisticRegression
from epistasis.pyplot import plot_coefs

# The data
wildtype = "000"
genotypes = ['000', '001', '010', '011', '100', '101', '110', '111']
phenotypes = [0.366, -0.593, 1.595, -0.753, 0.38, 1.296, 1.025, -0.519]
gpm = GenotypePhenotypeMap(wildtype, genotypes, phenotypes)

# Threshold
threshold = 1.0

# Initialize a model
model = EpistasisLogisticRegression(threshold=threshold)
model.add_gpm(gpm)

# Fit the model
model.fit()

fig, ax = plot_coefs(model, figsize=(1, 3))
plt.show()