コード例 #1
0
ファイル: test_models_base.py プロジェクト: harmsm/epistasis
def test__genotypes_to_X(test_data):

    # Make sure function catches bad genotype passes
    d = test_data[0]
    gpm = gpmap.GenotypePhenotypeMap(genotype=d["genotype"],
                                     phenotype=d["phenotype"])

    # Duplicated
    g = list(gpm.genotype)
    g.extend(g)

    # not in gpmap
    b = list(gpm.genotype)
    b.append("stupid")
    bad_genotypes = [g, b]
    for bad in bad_genotypes:
        with pytest.raises(ValueError):
            models.base._genotypes_to_X(bad, gpm, order=1, model_type="local")

    # Sample through various model comobos
    allowed = {"local": set([0, 1]), "global": set([-1, 1])}

    for d in test_data:

        gpm = gpmap.GenotypePhenotypeMap(genotype=d["genotype"],
                                         phenotype=d["phenotype"])

        for i in range(1, gpm.length + 1, 1):
            for model_type in ["local", "global"]:
                X = models.base._genotypes_to_X(gpm.genotype,
                                                gpm,
                                                order=i,
                                                model_type=model_type)
                assert X.shape[0] == len(gpm.genotype)
                assert set(np.unique(X)).issubset(allowed[model_type])
コード例 #2
0
def test_synchronize(test_data):

    # Should work even without gpm
    G = gpmap.GenotypePhenotypeGraph()
    assert G.synchronize() is None

    for d in test_data:

        # Make map
        G = gpmap.GenotypePhenotypeGraph()
        gpm = gpmap.GenotypePhenotypeMap(d["genotype"],
                                         phenotype=d["phenotype"])
        G.add_gpm(gpm)

        # Delete a node using DiGraph method and make sure it's really gone by
        # calling the super nodes property. (The GenotypePhenotypeGraph.nodes
        # property will call sync_nodes implicitly).
        super(gpmap.GenotypePhenotypeGraph,G).remove_node(0)
        assert len(super(gpmap.GenotypePhenotypeGraph,G).nodes) == len(d["genotype"]) - 1

        # Should be back because gpm.data always wins
        G.synchronize()
        assert len(G.nodes) == len(d["genotype"])

        # Delete a genotype directly from gpm.data
        mask = np.arange(1,len(G.gpm.data),dtype=int)
        G.gpm._data = G.gpm.data.loc[mask,:]

        # Should now lose a node.
        G.synchronize()

        assert len(G.nodes) == len(d["genotype"]) - 1

    for d in test_data:

        # Make map
        G = gpmap.GenotypePhenotypeGraph()
        gpm = gpmap.GenotypePhenotypeMap(d["genotype"],
                                         phenotype=d["phenotype"])
        G.add_gpm(gpm)

        num_neighbors = len(G.gpm.neighbors)

        # Delete an edge using DiGraph method and make sure it's really gone by
        # calling the super edges property. (The GenotypePhenotypeGraph.edges
        # property will call sync_nodes implicitly).
        super(gpmap.GenotypePhenotypeGraph,G).remove_edge(*list(G.edges)[0])
        assert len(super(gpmap.GenotypePhenotypeGraph,G).edges) == num_neighbors - 1

        # Should be back because gpm.data always wins
        G.synchronize()
        assert len(G.edges) == num_neighbors

        # Delete an edge directly from gpm.neighbors
        mask = np.arange(len(G.gpm.neighbors)-1,dtype=int)
        G.gpm._neighbors = G.gpm.neighbors.loc[mask,:]

        # Should now lose an edge.
        G.synchronize()
        assert len(G.edges) == num_neighbors - 1
コード例 #3
0
def test_constructor(test_data):

    G = base.GenotypePhenotypeGraph()
    assert isinstance(G,nx.DiGraph)
    assert G.gpm is None

    attributes = [G.node_options,G.edge_options,
                  G.node_label_options,G.edge_label_options]
    for a in attributes:
        assert type(a) is dict
        assert len(a) > 0

    with pytest.raises(KeyError):
        G.node_options["nodelist"]
    with pytest.raises(KeyError):
        G.node_options["edgelist"]

    assert G.node_options["node_size"] == G._default_node_size
    assert G.edge_options["node_size"] == G._default_node_size
    assert G.edge_options["arrows"] == False

    # Test bad gpm inputs
    bad_inputs = ["stupid",1,[],(1,),1.1]
    for b in bad_inputs:
        with pytest.raises(TypeError):
            G = base.GenotypePhenotypeGraph(gpm=b)

    # Send in good genotype phenotype map. does not have weights.
    d = test_data[0]
    gpm = gpmap.GenotypePhenotypeMap(d["genotype"])
    G = base.GenotypePhenotypeGraph(gpm=gpm)
    G.gpm.data
    G.gpm.neighbors

    # Pass bad edge weight columns
    gpm = gpmap.GenotypePhenotypeMap(d["genotype"])
    with pytest.raises(ValueError):
        G = base.GenotypePhenotypeGraph(gpm=gpm,edge_weight_column="not_good")

    gpm = gpmap.GenotypePhenotypeMap(d["genotype"])
    with pytest.raises(ValueError):
        G = base.GenotypePhenotypeGraph(gpm=gpm,edge_weight_column=1.1)

    # Should work and have weighted edges
    gpm = gpmap.GenotypePhenotypeMap(d["genotype"])
    G = base.GenotypePhenotypeGraph(gpm=gpm)
    for g in G.edges:
        G.edges[g[0],g[1]]["weight"]
コード例 #4
0
def test_nodes_getter(test_data):

    # Make sure getter works. note it runs sync_nodes

    # Should work even without gpm
    G = gpmap.GenotypePhenotypeGraph()
    assert len(G.nodes) == 0

    for d in test_data:

        # Make map
        G = gpmap.GenotypePhenotypeGraph()
        gpm = gpmap.GenotypePhenotypeMap(d["genotype"],
                                         phenotype=d["phenotype"])
        G.add_gpm(gpm)

        # Delete a node using DiGraph method. It should not delete from node_list
        # because node_list getter runs sync_nodes()
        super(gpmap.GenotypePhenotypeGraph,G).remove_node(0)
        assert len(G.nodes) == len(d["genotype"])

        # Delete a genotype directly from gpm.data
        old_data = G.gpm._data.copy()
        mask = np.arange(len(G.gpm.data)-1,dtype=int)
        G.gpm._data = G.gpm.data.loc[mask,:]

        # Should now lose a node.
        assert len(G.nodes) == len(d["genotype"]) - 1

        # Put node back in via data frame. nodes should update.
        G.gpm._data = old_data
        assert len(G.nodes) == len(d["genotype"])
コード例 #5
0
def test_add_remove_node_cmap(test_data):
    """
    Test both add_node_cmap and remove_node_cmap methods.
    """

    G = gpmap.GenotypePhenotypeGraph()
    with pytest.raises(RuntimeError):
        G.add_node_cmap("test")

    for d in test_data:
        G = gpmap.GenotypePhenotypeGraph()
        gpm = gpmap.GenotypePhenotypeMap(d["genotype"],
                                         phenotype=d["phenotype"])

        G.add_gpm(gpm)

        with pytest.raises(KeyError):
            G.add_node_cmap(data_column="not_a_column")

        with pytest.raises(ValueError):
            G.add_node_cmap(data_column="phenotype",cmap="not_a_cmap")

        G.add_node_cmap(data_column="phenotype",cmap="plasma")
        node_options = copy.deepcopy(G.node_options)
        assert type(node_options["node_color"]) is tuple
        assert node_options["node_color"][0] == "_gpm"
        assert node_options["node_color"][1] == "phenotype"
        assert type(node_options["node_color"][2]) is type(matplotlib.cm.get_cmap("plasma"))
        assert node_options["node_color"][3] == np.min(d["phenotype"])
        assert node_options["node_color"][4] == np.max(d["phenotype"])

        # Now pass in cmap as cm object
        G = gpmap.GenotypePhenotypeGraph()
        G.add_gpm(gpm)

        cmap = matplotlib.cm.get_cmap("plasma")
        G.add_node_cmap(data_column="phenotype",cmap=cmap)
        node_options = copy.deepcopy(G.node_options)
        assert type(node_options["node_color"]) is tuple
        assert node_options["node_color"][0] == "_gpm"
        assert node_options["node_color"][1] == "phenotype"
        assert type(node_options["node_color"][2]) is type(matplotlib.cm.get_cmap("plasma"))
        assert node_options["node_color"][3] == np.min(d["phenotype"])
        assert node_options["node_color"][4] == np.max(d["phenotype"])

        G.add_node_cmap(data_column="phenotype",cmap=cmap,vmin=5,vmax=10)
        node_options = copy.deepcopy(G.node_options)
        assert type(node_options["node_color"]) is tuple
        assert node_options["node_color"][0] == "_gpm"
        assert node_options["node_color"][1] == "phenotype"
        assert type(node_options["node_color"][2]) is type(matplotlib.cm.get_cmap("plasma"))
        assert node_options["node_color"][3] == 5
        assert node_options["node_color"][4] == 10

        G.remove_node_cmap()
        removed_options = ["vmin","vmax","cmap"]
        for r in removed_options:
            with pytest.raises(KeyError):
                G.node_options[r]
        assert G.node_options["node_color"] == "gray"
コード例 #6
0
def test_set_edge_label_options(test_data):

    G = base.GenotypePhenotypeGraph()

    # Get available options
    avail_options = copy.deepcopy(G.edge_label_options)

    # Set individually to 1.
    for a in avail_options:
        G.set_edge_label_options(**{a:1})
        assert G.edge_label_options[a] == 1

    # Set all at once and make sure they match
    G.set_edge_label_options(**avail_options)
    for a in avail_options:
        assert G.edge_label_options[a] == avail_options[a]

    # pass bad options
    bad_options = ["not_a_key"]
    for b in bad_options:
        with pytest.raises(KeyError):
            G.set_edge_label_options(**{b:1})

    # test exotic edge_label setting
    d = test_data[0]
    gpm = gpmap.GenotypePhenotypeMap(d["genotype"])
コード例 #7
0
def test_encoding_to_sites(test_data):

    d = test_data[0]
    gpm = gpmap.GenotypePhenotypeMap(genotype=d["genotype"],
                                     phenotype=d["phenotype"])
    gpm.encoding_table

    #order, encoding_table, start_order

    start_orders = [0, 1]
    L = len(d["genotype"][0])
    orders = range(1, L + 1)
    for s in start_orders:
        for o in orders:
            sites = epistasis.mapping.encoding_to_sites(
                order=o, encoding_table=gpm.encoding_table, start_order=s)

            # Make sure generating write number of terms (n choose k for each
            # order plus 1 if we are starting at order 0).
            num_terms = 0
            for i in range(1, o + 1):
                num_terms += scipy.special.comb(L, i)

            if s == 0:
                num_terms += 1

            assert len(sites) == num_terms
コード例 #8
0
ファイル: test_models_base.py プロジェクト: harmsm/epistasis
def test__thetas_arghandler(test_data):

    m = models.linear.EpistasisLinearRegression()

    d = test_data[0]
    gpm = gpmap.GenotypePhenotypeMap(genotype=d["genotype"],
                                     coolness=d["phenotype"],
                                     uncertainty=d["phenotype"])
    m.add_gpm(gpm,
              phenotype_column="coolness",
              uncertainty_column="uncertainty")

    # No thetas calcualted yet
    with pytest.raises(RuntimeError):
        m._thetas()
    m.fit()

    # Get thetas, calcualted
    t = m._thetas()
    assert len(t) == 4

    # pass in general badness
    bad_passes = [np.ones((1, 1, 1)), [], "stupid", 1, 1.1, ()]
    for b in bad_passes:
        with pytest.raises(TypeError):
            print(f"trying {b}")
            m._thetas(b)

    y = m._thetas([1.0])
    assert np.array_equal(y, [1.0])
コード例 #9
0
ファイル: test_models_base.py プロジェクト: harmsm/epistasis
def test__lnprior(test_data):

    m = models.linear.EpistasisLinearRegression()
    with pytest.raises(ValueError):
        m._lnprior()

    d = test_data[0]
    gpm = gpmap.GenotypePhenotypeMap(genotype=d["genotype"],
                                     coolness=d["phenotype"],
                                     uncertainty=d["phenotype"])
    m.add_gpm(gpm,
              phenotype_column="coolness",
              uncertainty_column="uncertainty")

    x = m._lnprior()
    assert np.array_equal(x, np.zeros(len(d["genotype"])))

    # pass in general badness
    bad_passes = [np.ones((1, 1, 1)), [], "stupid", 1, 1.1, ()]
    for b in bad_passes:
        with pytest.raises(TypeError):
            print(f"trying {b}")
            m._lnprior(b)

    y = m._lnprior([1.0])
    assert np.array_equal(y, [1.0])
コード例 #10
0
ファイル: test_models_base.py プロジェクト: harmsm/epistasis
def test_abstractmodel_predict_to_df(test_data):
    """
    Test basic functionality. Real test of values will be done on .predict
    for subclasses.
    """

    m = models.linear.EpistasisLinearRegression()
    d = test_data[0]
    gpm = gpmap.GenotypePhenotypeMap(genotype=d["genotype"],
                                     phenotype=d["phenotype"])
    m.add_gpm(gpm)

    # This should fail -- no fit run
    with pytest.raises(Exception):
        df = m.predict_to_df()

    m.fit()

    # This should work
    df = m.predict_to_df()
    assert type(df) is type(pd.DataFrame())
    assert len(df) == len(d["genotype"])

    # Create and fit a new model.
    m = models.linear.EpistasisLinearRegression()
    gpm = gpmap.GenotypePhenotypeMap(genotype=d["genotype"],
                                     phenotype=d["phenotype"])

    # No gpm added -- should fail
    with pytest.raises(RuntimeError):
        m.predict_to_df()

    m.add_gpm(gpm)
    m.fit()

    df = m.predict_to_df(genotypes=d["genotype"][0])
    assert len(df) == 1

    bad_stuff = [1, {}, [1, 2], "STUPID", ["STUPID", "IS", "REAL"]]
    for b in bad_stuff:
        with pytest.raises(ValueError):
            print(f"Trying bad genotypes {b}")
            m.predict_to_df(genotypes=b)

    df = m.predict_to_df(genotypes=d["genotype"][:3])
    assert len(df) == 3
コード例 #11
0
def test_gpm_getter():

    G = base.GenotypePhenotypeGraph()
    assert G.gpm is None

    gpm = gpmap.GenotypePhenotypeMap(["AA"])
    G.add_gpm(gpm)
    assert G.gpm is gpm
コード例 #12
0
def test_edge_weight_column_getter():

    gpm = gpmap.GenotypePhenotypeMap(genotype=["00","11"])
    G = gpmap.GenotypePhenotypeGraph(gpm)

    G.edge_weight_column = "weight"
    assert G.edge_weight_column == "weight"

    G.edge_weight_column = None
    assert G.edge_weight_column == None

    gpm = gpmap.GenotypePhenotypeMap(genotype=["00","01"])
    gpm.get_neighbors()
    gpm.neighbors.loc[:,"test"] = np.ones(len(gpm.neighbors))

    G = gpmap.GenotypePhenotypeGraph(gpm,edge_weight_column="test")
    assert G.edge_weight_column == "test"
コード例 #13
0
ファイル: test_models_base.py プロジェクト: harmsm/epistasis
def test__X_arghandler(test_data):

    m = models.linear.EpistasisLinearRegression()
    with pytest.raises(ValueError):
        m._X()

    d = test_data[0]
    gpm = gpmap.GenotypePhenotypeMap(genotype=d["genotype"],
                                     phenotype=d["phenotype"],
                                     uncertainty=d["phenotype"])
    m.add_gpm(gpm)

    # Make sure calling _X() naked-ly populates previous_X
    assert m._previous_X is None
    X = m._X()
    assert m._previous_X is X

    # If we access after having run, make sure X is the same object
    assert X is m._X()

    # Should wipe out previous_X and force recalculation.
    m.add_gpm(gpm)
    assert X is not m._X()

    # Get x for single genotype. should work. should not update _previous_X
    X = m._X(d["genotype"][0])
    assert len(X) == 1
    assert X is not m._previous_X

    # Get x for two genotypes. should work and not update _previous_X
    X = m._X(d["genotype"][0:2])
    assert len(X) == 2
    assert X is not m._previous_X

    # Get x for two genotypes. should work and not update _previous_X
    X = m._X(np.array(d["genotype"][0:2]))
    assert len(X) == 2
    assert X is not m._previous_X

    # Just keep the array, do not update previous_X
    hack = np.ones((1, 1))
    X = m._X(data=hack)
    assert X is hack
    assert X is not m._previous_X

    # pass in bad genotypes
    with pytest.raises(ValueError):
        X = m._X("NOT_A_GENOTYPE")
    with pytest.raises(ValueError):
        X = m._X([d["genotype"][0], "NOT_A_GENOTYPE"])

    # pass in general badness
    bad_passes = [np.ones((1, 1, 1)), [], "stupid", 1, 1.1, ()]
    for b in bad_passes:
        with pytest.raises(ValueError):
            m._X(b)
コード例 #14
0
ファイル: test_models_base.py プロジェクト: harmsm/epistasis
def test_gpm_getter(test_data):

    d = test_data[0]
    gpm = gpmap.GenotypePhenotypeMap(genotype=d["genotype"],
                                     phenotype=d["phenotype"])

    m = models.linear.EpistasisLinearRegression()
    assert m.gpm is None
    m.add_gpm(gpm)
    assert m.gpm is gpm
コード例 #15
0
def test__encode_vectors(test_data):

    for d in test_data:

        for model_type in ["global", "local"]:

            gpm = gpmap.GenotypePhenotypeMap(genotype=d["genotype"])

            vectors = mm._encode_vectors(gpm.binary, model_type=model_type)

            # Make sure outpu is what is expected
            assert type(vectors) is np.ndarray
            assert vectors.shape[0] == len(gpm.binary)
            assert vectors.shape[1] == len(gpm.binary[0]) + 1
            assert np.array_equal(d[f"{model_type}_encoding"], vectors)

        # Send bad model matrix
        gpm = gpmap.GenotypePhenotypeMap(genotype=d["genotype"])
        with pytest.raises(ValueError):
            vectors = mm._encode_vectors(gpm.binary, model_type="stupid")
コード例 #16
0
ファイル: test_models_base.py プロジェクト: harmsm/epistasis
def test_results_getter(test_data):

    d = test_data[0]
    gpm = gpmap.GenotypePhenotypeMap(genotype=d["genotype"],
                                     phenotype=d["phenotype"])

    m = models.linear.EpistasisLinearRegression()
    m.add_gpm(gpm)

    assert m.results is None
    m.fit()
    assert isinstance(m.results, pd.DataFrame)
コード例 #17
0
def test_get_neighbors():

    gpm = gpmap.GenotypePhenotypeMap(genotype=["SG", "PF", "SF", "PG"])
    genotype = gpm._data.loc[:, "genotype"]

    # Must pass genotype
    with pytest.raises(TypeError):
        get_neighbors_cython.get_neighbors()

    # should run
    get_neighbors_cython.get_neighbors(genotype)

    # check neighbor function sanity
    with pytest.raises(ValueError):
        get_neighbors_cython.get_neighbors(genotype,
                                           neighbor_function="not_real")

    # check cutoff sanity
    bad_cutoff = [-2, "stupid", [], None]
    for b in bad_cutoff:
        with pytest.raises(ValueError):
            get_neighbors_cython.get_neighbors(genotype, cutoff=b)

    # Hamming distance of one
    hamming_edges = [(0, 0), (0, 2), (2, 0), (0, 3), (3, 0), (1, 1), (1, 2),
                     (2, 1), (1, 3), (3, 1), (2, 2), (3, 3)]

    source, target = get_neighbors_cython.get_neighbors(genotype,
                                                        "hamming",
                                                        cutoff=1)

    for i in range(len(source)):
        assert np.array_equal(hamming_edges[i], (source[i], target[i]))

    # Hamming distance of two
    hamming_edges = [(0, 0), (0, 1), (1, 0), (0, 2), (2, 0), (0, 3), (3, 0),
                     (1, 1), (1, 2), (2, 1), (1, 3), (3, 1), (2, 2), (2, 3),
                     (3, 2), (3, 3)]
    source, target = get_neighbors_cython.get_neighbors(genotype,
                                                        "hamming",
                                                        cutoff=2)
    for i in range(len(source)):
        assert np.array_equal(hamming_edges[i], (source[i], target[i]))

    # aa distane of one
    codon_edges = [(0, 0), (0, 3), (3, 0), (1, 1), (1, 2), (2, 1), (2, 2),
                   (3, 3)]
    source, target = get_neighbors_cython.get_neighbors(genotype,
                                                        "codon",
                                                        cutoff=1)
    for i in range(len(source)):
        assert np.array_equal(codon_edges[i], (source[i], target[i]))
コード例 #18
0
def test_edges_getter(test_data):

    # Should work even without gpm
    G = gpmap.GenotypePhenotypeGraph()
    assert len(G.edges) == 0

    for d in test_data:

        # Make map
        G = gpmap.GenotypePhenotypeGraph()
        gpm = gpmap.GenotypePhenotypeMap(d["genotype"],
                                         phenotype=d["phenotype"])
        G.add_gpm(gpm)

        new_gpm_neighbors = gpm.neighbors.copy()

        num_edges = len(G.gpm.neighbors)

        # Delete a edge using DiGraph method. It should not delete from edges
        # because edges getter runs sync_edges()
        e = list(super(gpmap.GenotypePhenotypeGraph,G).edges)[0]
        super(gpmap.GenotypePhenotypeGraph,G).remove_edge(*e)
        assert len(G.edges) == num_edges

        # Delete a genotype directly from gpm.data
        mask = np.arange(len(G.gpm.neighbors)-1,dtype=int)
        G.gpm._neighbors = G.gpm.neighbors.loc[mask,:]

        # Should now lose a edge.
        assert len(G.edges) == num_edges - 1

        # Add full gpm, which will effectively add one genotype back in
        G.gpm._neighbors = new_gpm_neighbors.copy()
        assert len(G.edges) == num_edges

        # Set weight
        G.edge_weight_column = "weight"
        for e in G.edges:
            assert G.edges[e[0],e[1]]["weight"] == 1

        # Remove weight
        G.edge_weight_column = None
        for e in G.edges:
            with pytest.raises(KeyError):
                G.edges[e[0],e[1]]["weight"]
コード例 #19
0
ファイル: test_models_base.py プロジェクト: harmsm/epistasis
def test_column_getters(test_data):

    d = test_data[0]
    gpm = gpmap.GenotypePhenotypeMap(genotype=d["genotype"],
                                     phenotype=d["phenotype"],
                                     uncertainty=d["phenotype"])

    m = models.linear.EpistasisLinearRegression()

    assert m.genotype_column is None
    assert m.phenotype_column is None
    assert m.uncertainty_column is None

    m.add_gpm(gpm, uncertainty_column="uncertainty")

    assert m.genotype_column == "genotype"
    assert m.phenotype_column == "phenotype"
    assert m.uncertainty_column == "uncertainty"
コード例 #20
0
ファイル: test_models_base.py プロジェクト: harmsm/epistasis
def test_abstractmodel_predict_to_excel(test_data, tmp_path):

    m = models.linear.EpistasisLinearRegression()
    d = test_data[0]
    gpm = gpmap.GenotypePhenotypeMap(genotype=d["genotype"],
                                     phenotype=d["phenotype"])
    m.add_gpm(gpm)
    m.fit()

    excel_file = os.path.join(tmp_path, "tmp.xlsx")

    m.predict_to_excel(filename=excel_file)
    assert os.path.exists(excel_file)
    df = pd.read_excel(excel_file)
    assert len(df) == len(d["genotype"])

    # Make sure genotypes pass works
    m.predict_to_excel(filename=excel_file, genotypes=d["genotype"][0])
    assert os.path.exists(excel_file)
    df = pd.read_excel(excel_file)
    assert len(df) == 1
コード例 #21
0
def test_add_remove_edge_labels(test_data):

    # Throw error because no gpmap
    G = gpmap.GenotypePhenotypeGraph()
    with pytest.raises(RuntimeError):
        G.add_edge_labels("test")

    for d in test_data:

        # Make map
        G = gpmap.GenotypePhenotypeGraph()
        gpm = gpmap.GenotypePhenotypeMap(d["genotype"],
                                         phenotype=d["phenotype"])
        G.add_gpm(gpm)
        assert G.gpm is gpm

        # Throw error data column is not real
        with pytest.raises(KeyError):
            G.add_edge_labels(data_column="not_a_column")

        # This should work
        G.add_edge_labels(data_column="weight")

        # This should not work
        with pytest.raises(ValueError):
            G.add_edge_labels(data_column="weight",fmt="{:d}")

        G.add_edge_labels(data_column="weight")
        expected = ("_gpm","weight","{:.3f}")
        assert np.array_equal(expected,G.edge_label_options["edge_labels"])

        # Check remove_edge_sizemap
        G.remove_edge_labels()
        with pytest.raises(KeyError):
            G.edge_label_options["edge_labels"]

        # Make sure fmt pass works
        G.add_edge_labels(data_column="weight",fmt="{}")
        expected = ("_gpm","weight","{}")
        assert np.array_equal(expected,G.edge_label_options["edge_labels"])
コード例 #22
0
def test_edge_weight_column_setter(test_data):

    G = gpmap.GenotypePhenotypeGraph()
    with pytest.raises(RuntimeError):
        G.edge_weight_column = "weight"

    for d in test_data:

        gpm = gpmap.GenotypePhenotypeMap(d["genotype"])
        G.add_gpm(gpm)

        with pytest.raises(KeyError):
            G.edge_weight_column = "stupid"

        with pytest.raises(TypeError):
            G.edge_weight_column = "edge"

        # should work
        G.edge_weight_column = "weight"
        for g in G.edges:
            print(g)
            G.edges[g[0],g[1]]["weight"]
コード例 #23
0
def test_get_model_matrix(test_data):

    for d in test_data:
        gpm = gpmap.GenotypePhenotypeMap(genotype=d["genotype"])

        for model_type in ["global", "local"]:
            for i in range(gpm.length):

                # Get sites for this site
                sites = mapping.encoding_to_sites(i + 1, gpm.encoding_table)

                for use_cython in [True, False]:

                    X = m.get_model_matrix(gpm.binary,
                                           sites,
                                           model_type=model_type,
                                           use_cython=use_cython)

                    if use_cython:
                        cython_X = np.copy(X)
                    else:
                        # Make sure python and cython give same answer
                        assert np.array_equal(cython_X, X)
コード例 #24
0
def test_pyplot_plot(test_data):

    d = test_data[0]

    # Feed in GenotypePhenotypeMap
    gpm = gpmap.GenotypePhenotypeMap(d["genotype"])
    G, fig, ax = gpmap.plot(gpm)
    assert isinstance(G, gpmap.GenotypePhenotypeGraph)
    assert isinstance(fig, matplotlib.figure.Figure)
    assert isinstance(ax, matplotlib.axes.Axes)
    plt.close()

    # Feed in GenotypePhenotypeGraph
    gpm = gpmap.GenotypePhenotypeMap(d["genotype"])
    G_in = gpmap.GenotypePhenotypeGraph()
    G_in.add_gpm(gpm)

    G, fig, ax = gpmap.plot(G_in)
    assert G is G
    assert gpm is G.gpm

    assert isinstance(G, gpmap.GenotypePhenotypeGraph)
    assert isinstance(fig, matplotlib.figure.Figure)
    assert isinstance(ax, matplotlib.axes.Axes)
    plt.close()

    # Feed in bad stuff
    bad_args = ["test", gpmap.GenotypePhenotypeGraph, (1, 23), 14]
    for b in bad_args:
        with pytest.raises(TypeError):
            gpmap.plot(b)

    # Feed in all combinations of plot modes...
    bool_args = [
        "plot_nodes", "plot_edges", "plot_node_labels", "plot_edge_labels"
    ]
    for i in range(len(bool_args)):
        for c in itertools.combinations(bool_args, i + 1):
            bool_kwargs = dict([(k, True) for k in bool_args])
            for k in c:
                bool_kwargs[k] = False

            gpm = gpmap.GenotypePhenotypeMap(d["genotype"])
            G, fig, ax = gpmap.plot(gpm, **bool_kwargs)
            assert isinstance(G, gpmap.GenotypePhenotypeGraph)
            assert isinstance(fig, matplotlib.figure.Figure)
            assert isinstance(ax, matplotlib.axes.Axes)
            plt.close()

    # Check passing node options
    gpm = gpmap.GenotypePhenotypeMap(d["genotype"])
    G, fig, ax = gpmap.plot(gpm, node_options={"node_size": 5})
    plt.close()
    assert G.node_options["node_size"] == 5

    with pytest.raises(TypeError):
        gpm = gpmap.GenotypePhenotypeMap(d["genotype"])
        G, fig, ax = gpmap.plot(gpm, node_options="not_right_type")

    # Check passing edge options
    gpm = gpmap.GenotypePhenotypeMap(d["genotype"])
    G, fig, ax = gpmap.plot(gpm, edge_options={"style": "--"})
    plt.close()
    assert G.edge_options["style"] == "--"

    with pytest.raises(TypeError):
        gpm = gpmap.GenotypePhenotypeMap(d["genotype"])
        G, fig, ax = gpmap.plot(gpm, edge_options="not_right_type")

    # Check passing node label options
    gpm = gpmap.GenotypePhenotypeMap(d["genotype"])
    G, fig, ax = gpmap.plot(gpm, node_label_options={"font_size": 14})
    plt.close()
    assert G.node_label_options["font_size"] == 14

    with pytest.raises(TypeError):
        gpm = gpmap.GenotypePhenotypeMap(d["genotype"])
        G, fig, ax = gpmap.plot(gpm, node_label_options="not_right_type")

    # Check passing edge label options
    gpm = gpmap.GenotypePhenotypeMap(d["genotype"])
    G, fig, ax = gpmap.plot(gpm, edge_label_options={"font_size": 14})
    plt.close()
    assert G.edge_label_options["font_size"] == 14

    with pytest.raises(TypeError):
        gpm = gpmap.GenotypePhenotypeMap(d["genotype"])
        G, fig, ax = gpmap.plot(gpm, edge_label_options="not_right_type")

    # Test figsize setting
    gpm = gpmap.GenotypePhenotypeMap(d["genotype"])
    G, fig, ax = gpmap.plot(gpm, figsize=(2, 2))
    assert np.array_equal(fig.get_size_inches(), (2, 2))
    plt.close()

    bad_fig_size = [(1, 2, 3), (1, ), "stupid", "RA", 5]
    for b in bad_fig_size:
        with pytest.raises(ValueError):
            G, fig, ax = gpmap.plot(gpm, figsize=b)

    # test ax pass
    G, fig, ax = gpmap.plot(gpm)
    G, fig, ax2 = gpmap.plot(gpm, ax=ax)
    assert ax is ax2
    with pytest.raises(TypeError):
        G, fig, ax = gpmap.plot(gpm, ax="stupid")
    plt.close()
    plt.close()
コード例 #25
0
def generate_gpm(wildtype=None,
                 mutations=None,
                 site_labels=None,
                 num_sites=5,
                 num_states_per_site=2,
                 alphabet=None,
                 max_genotypes=131072):
    r"""
    Generate a GenotypePhenotypeMap with various genotypes.

    wildtype : string
        wildtype sequence. if not specified, construct arbitrary wildtype from
        mutations list.

    mutations : list-like
        If specified, this overrides num_sites and num_states_per_site. This
        can take have two forms:

        1) Just like a typical GenotypePhenotypeMap call, this could be a list
        of lists, with one list per site. The internal lists hold the mutation
        alphabet for each site. For example, mutations=[["A","B"],["A"]] would
        create a map with states "A" and "B" at the first site and state "A" at
        the second site. NOTE: if wildtype is specified, its states must be
        in the mutations list. A wildtype sequence "BA" would be compatible with
        the mutation list above, but "BB" would not because there is no "B" in
        the second position.

        2) This could be a list of ints, where the ints indicate how many states
        to give each site. Thus, mutations=[2,1] would create a map with two
        states at the first site and one state at the second site.

    site_labels : array-like
        list of labels to apply to sites.  If this is not specified, the
        first site is assigned a label 0, the next 1, etc.  If specified, sites
        are assigned labels in the order given.  For example, if the genotypes
        specify mutations at positions 12 and 75, this would be a list [12,75].

    num_sites : int
        Number of sites to give each genotype. If mutations are given, this
        is ignored; if mutations are not given, this is required.

    num_states_per_site : int
        Number of states to assign each site in a genotype. If mutations are
        given, this is ignored; if mutations are not given, this is required. If
        num_states_per_site was 2 and num_sites was 5, this would generate a
        map with 5 sites in two possible state (2^5 genotypes). To specify
        different numbers of states at each site, use the mutations argument.

    alphabet : str
        letters to use for generating genotypes. This is used for map
        construction unless mutations is a list of lists containing states, in
        which case this argument is ignored. This can have two forms:

        1) key for a pre-defined alphabet: 'aa':'ACDEFGHIKLMNPQRSTVWY',
        'dna':'ACGT','rna':'ACGU', or 'number':'0123456789'
        2) A string of unique letters (for example, "ACGTU" or "1XyzP")

    max_genotypes : int
        do not create a GenotypePhenotypeMap that has more than max_genotypes
        genotypes. (This check is in place to avoid accidentally constructing
        a truly massive combinatorial map.) If map will be too big, throws a
        RuntimeError. To disable this check, set to None. Default is
        2^17 = 131072.
    """

    # ------------------------------------------------------------------------
    # Deal with alphabet. This will only be used if we have to construct a
    # mutations list, but has to be done first so we can process mutations
    # argument properly.
    # ------------------------------------------------------------------------

    if alphabet is None:
        alphabet = list(ALPHABETS["aa"])

    else:

        err = None

        # See if it's a known alphabet
        try:
            alphabet = list(ALPHABETS[alphabet])

        # Not a known alphabet. Make sure we can turn into a list of strings.
        except (TypeError, KeyError):

            try:
                alphabet = list(alphabet)
                for i in range(len(alphabet)):
                    alphabet[i] = f"{alphabet[i]}"
                    if len(alphabet[i]) > 1:
                        err = f"alphabet state {alphabet[i]} more than one letter\n"
                        break
            except (ValueError, TypeError):
                err = f"problem parsing alphabet {alphabet}\n"

        # Make sure the alphabet has unique letters.
        if err is None:
            if len(alphabet) != len(set(alphabet)):
                err = "alphabet contains non-unique letters\n"

        if err is not None:
            err += "\nalphabet can have two forms. 1) a string indicating a \n"
            err += "built in alphabet (aa, dna, rna, number); 2) a string of \n"
            err += "unique letters\n"
            raise ValueError(err)

    # ------------------------------------------------------------------------
    # Deal with wildtype entry. We need to make sure a generated mutations
    # list has wildtype entries or that a specified mutations entry has the
    # wildtype states.
    # ------------------------------------------------------------------------

    if wildtype is not None:

        try:
            wildtype = [f"{w}" for w in list(wildtype)]
            if set([len(w) for w in wildtype]) != set([1]):
                raise TypeError
            if len(wildtype) == 0:
                raise TypeError
        except (TypeError, ValueError):
            err = f"wildtype '{wildtype}' could not be interpreted as a list of\n"
            err += "single-character strings. len(wildtype) must be > 0.\n"
            raise ValueError(err)

    # ------------------------------------------------------------------------
    # If mutations is specified, do some sanity checking and/or construction.
    # ------------------------------------------------------------------------

    if mutations is not None:

        err = None

        # Make sure mutations is iterable
        try:
            new_num_sites = len(mutations)
            new_mutations = [None for _ in range(new_num_sites)]
        except TypeError:
            err = f"mutations '{mutations}' is not iterable.\n"

        # Check for length match with wildtype
        if err is None:
            if wildtype is not None:
                if len(wildtype) != new_num_sites:
                    err = "mutations does not match length of specified wildtype\n"

        if err is None:
            if new_num_sites == 0:
                err = "mutations has no sites\n"

        # If we get here, so far so good...
        if err is None:

            # Now iterate over mutations.
            for i, site in enumerate(mutations):

                # Try to interpret mutations as a list of single-character
                # strings corresponding to states at the site.
                try:
                    states_at_site = [f"{s}" for s in site]
                    if len(set(states_at_site)) != len(states_at_site):
                        err = "site '{site}' has non-unique mutations\n"
                        break
                    if set([len(s) for s in states_at_site]) != set([1]):
                        err = "not all states at site '{site}' can be turned\n"
                        err += "into single-character strings\n"
                        break

                    # Make sure wildtype state is in the states at this site.
                    if wildtype is not None:
                        if not set(wildtype[i]).issubset(set(states_at_site)):
                            err = f"wildtype state {wildtype[i]} not in mutations\n"
                            err += f"({states_at_site})\n"
                            break

                    # If we get here, mutations at site i passed quality control
                    new_mutations[i] = states_at_site[:]

                # If we can't interpret site as a list of states -- maybe it's
                # an integer specifying the number of states at the site?
                except (TypeError, ValueError):

                    # Try to coerce site into an integer.
                    try:
                        new_num_states = int(site)

                        # Is it too big of integer to specify states given
                        # alphabet?
                        if new_num_states > len(alphabet):
                            err = "site '{site}' requests more states than are \n"
                            err += "in the specified alphabet '{alphabet}'\n"
                            break

                        # Is it less than one?
                        if new_num_states < 1:
                            err = "site '{site}' not > 0\n"
                            break

                    # Nope, not an integer. Die.
                    except (ValueError, TypeError):
                        err = f"could not interpret site '{site}' in mutations vector\n"
                        break

                    # Generate states for this site, making sure the wildtype
                    # state is included if wildtype was specified.
                    if wildtype is None:
                        site_states = []
                        local_alphabet = alphabet[:]
                    else:
                        site_states = [wildtype[i]]
                        new_num_states = new_num_states - 1
                        local_alphabet = [
                            a for a in alphabet if a != wildtype[i]
                        ]

                    # If we need to add more than the wildtype state, choose
                    # randomly from the alphabet
                    if new_num_states > 0:
                        site_states.extend(
                            _sample_alphabet(local_alphabet, new_num_states))

                    new_mutations[i] = copy.deepcopy(site_states)

        if err is not None:
            err += "\nmutations should be a list indicating the mutations to \n"
            err += "allow at each site. It can have two forms: a list of lists\n"
            err += "or a list of ints. If a list of lists, the length of the \n"
            err += "outer list determines the number of sites, while the inner\n"
            err += "lists indicate the states possible at each site. These \n"
            err += "states must be unique within the site and be able to be \n"
            err += "coerced into single-letter characters. If a list of ints,\n"
            err += "the list length determines the number of sites and the\n"
            err += "int indicates the number of states at that site. The\n"
            err += "states are selected randomly from the chosen alphabet.\n"

            raise ValueError(err)

        num_sites = new_num_sites
        mutations = copy.deepcopy(new_mutations)

    # ------------------------------------------------------------------------
    # If no mutations are specified, build it from num_sites and
    # num_states_per_site. Pull in wildtype states if that was specified.
    # ------------------------------------------------------------------------
    if mutations is None:

        # Make sure number of sites is sane.
        try:
            num_sites = int(num_sites)
            if num_sites < 1:
                raise TypeError
        except TypeError:
            err = "num_sites should be an integer > 0\n"
            raise ValueError(err)

        # Make sure wildtype and num_sites have the same length
        if wildtype is not None:
            if len(wildtype) != num_sites:
                err = f"wildtype '{wildtype}' is not the same length as \n"
                err += f"num_sites '{num_sites}'\n"
                raise ValueError(err)

        # Make sure the number of states per site is sane
        try:
            num_states_per_site = int(num_states_per_site)
            if num_states_per_site < 1:
                raise TypeError
            if num_states_per_site > len(alphabet):
                raise TypeError
        except TypeError:
            err = "num_states_per_site should be an integer > 0 and\n"
            err += f"<= the alphabet size. Current alphabet: {alphabet}\n"
            raise ValueError(err)

        mutations = []
        for i in range(num_sites):

            # Make sure the site has the wildtype state if specified
            if wildtype is None:
                mutations.append([])
                local_alphabet = alphabet[:]
                num_states = num_states_per_site
            else:
                mutations.append([wildtype[i]])
                local_alphabet = [a for a in alphabet if a != wildtype[i]]
                num_states = num_states_per_site - 1

            # If we need to add more than the wildtype state, choose
            # randomly from the alphabet
            if num_states > 0:
                mutations[i].extend(
                    _sample_alphabet(local_alphabet, num_states))

    # ------------------------------------------------------------------------
    # Wildtype, again
    # ------------------------------------------------------------------------

    # If we still don't have wildtype, create it from the mutations list
    if wildtype is None:
        wildtype = [m[0] for m in mutations]

    wildtype = "".join(wildtype)

    # Note, for site labels, let GenotypePhenotypeMap object check sanity.

    # ------------------------------------------------------------------------
    # Do last sanity check on size. Avoid a massive combinatorial explosion...
    # ------------------------------------------------------------------------

    map_size = np.product([len(m) for m in mutations])

    if max_genotypes is None:
        max_genotypes = np.inf
    else:
        try:
            max_genotypes = int(max_genotypes)
        except TypeError:
            err = "max_genotypes should be an integer or None\n"
            raise ValueError(err)

    if map_size > max_genotypes:
        err = "This command will generate a genotype phenotype map with more\n"
        err += f"{map_size} genotypes. This is larger than max_genotypes\n"
        err += f"({max_genotypes}). To generate this map, either choose\n"
        err += "parameters that will lead to a smaller map or increase\n"
        err += "max_genotypes. To disable this check, set max_genotypes to None.\n"
        raise RuntimeError(err)

    # Generate vector of genotypes from mutations list
    genotype = gpmap.utils.mutations_to_genotypes(mutations)

    # Generate and return the GenotypePhenotypeMap.
    return gpmap.GenotypePhenotypeMap(genotype,
                                      wildtype=wildtype,
                                      site_labels=site_labels)
コード例 #26
0
ファイル: test_models_base.py プロジェクト: harmsm/epistasis
def test_abstractmodel_add_gpm(test_data):

    d = test_data[0]
    gpm = gpmap.GenotypePhenotypeMap(genotype=d["genotype"],
                                     phenotype=d["phenotype"])

    m = models.linear.EpistasisLinearRegression()

    bad_gpm = [1, None, "test", [], {}]
    for b in bad_gpm:
        with pytest.raises(TypeError):
            m.add_gpm(b)
    m.add_gpm(gpm)

    # Test genotype_column arg

    d = test_data[0]
    gpm = gpmap.GenotypePhenotypeMap(genotype=d["genotype"],
                                     phenotype=d["phenotype"])

    m = models.linear.EpistasisLinearRegression()
    bad_genotype_column = [1, None, [], {}, (1, )]
    for b in bad_genotype_column:
        with pytest.raises(TypeError):
            print(f"trying {b}")
            m.add_gpm(gpm, genotype_column=b)

    with pytest.raises(KeyError):
        m.add_gpm(gpm, genotype_column="not_a_column")

    m.add_gpm(gpm, genotype_column="genotype")
    assert m.genotype_column == "genotype"

    # Test phenotype_column arg

    d = test_data[0]
    gpm = gpmap.GenotypePhenotypeMap(genotype=d["genotype"])

    m = models.linear.EpistasisLinearRegression()

    # Shouldn't work b/c no float column
    with pytest.raises(ValueError):
        m.add_gpm(gpm)

    # Shouldn't work because there is no column with that name
    gpm = gpmap.GenotypePhenotypeMap(genotype=d["genotype"],
                                     phenotype=d["phenotype"])
    with pytest.raises(KeyError):
        m.add_gpm(gpm, phenotype_column="not_real")

    # Shouldn't work because column is not numeric
    gpm = gpmap.GenotypePhenotypeMap(genotype=d["genotype"],
                                     phenotype=d["genotype"])
    with pytest.raises(ValueError):
        m.add_gpm(gpm, phenotype_column="phenotype")

    # Make sure it gets right column (first float that is not reserved)
    gpm = gpmap.GenotypePhenotypeMap(genotype=d["genotype"],
                                     coolness=d["phenotype"],
                                     something_else=d["phenotype"])
    m.add_gpm(gpm)
    assert m.phenotype_column == "coolness"

    # Test uncertainty_column arg.

    # Do default = None
    gpm = gpmap.GenotypePhenotypeMap(genotype=d["genotype"],
                                     phenotype=d["phenotype"])
    m.add_gpm(gpm)
    assert m.uncertainty_column == "epi_zero_uncertainty"
    unc = np.array(m.gpm.data.loc[:, "epi_zero_uncertainty"])
    assert len(np.unique(unc)) == 1
    assert np.isclose(unc[0],
                      np.min(gpm.data.loc[:, m.phenotype_column]) * 1e-6)

    # pass missing column
    gpm = gpmap.GenotypePhenotypeMap(genotype=d["genotype"],
                                     phenotype=d["phenotype"],
                                     coolness=d["phenotype"],
                                     not_float=d["genotype"])

    # Send in same as phenotype
    with pytest.raises(ValueError):
        m.add_gpm(gpm, uncertainty_column="phenotype")

    # send in not there
    with pytest.raises(KeyError):
        m.add_gpm(gpm, uncertainty_column="not_there")

    # send in not float
    with pytest.raises(ValueError):
        m.add_gpm(gpm, uncertainty_column="not_float")

    # Shoud work
    m.add_gpm(gpm, uncertainty_column="coolness")
    assert m.uncertainty_column == "coolness"

    # Check final output
    assert m.gpm is gpm
    assert m.Xcolumns is not None
    assert m.epistasis is not None
    assert m._previous_X is None
コード例 #27
0
def test_add_remove_edge_sizemap(test_data):
    """
    Test add_edge_sizemap and remove_edge_sizemap methods.
    """

    # Throw error because no gpmap
    G = gpmap.GenotypePhenotypeGraph()
    with pytest.raises(RuntimeError):
        G.add_edge_sizemap("test")

    for d in test_data:

        # Make map
        G = gpmap.GenotypePhenotypeGraph()
        gpm = gpmap.GenotypePhenotypeMap(d["genotype"])
        G.add_gpm(gpm)
        assert G.gpm is gpm

        # Throw error data column is not real
        with pytest.raises(KeyError):
            G.add_edge_sizemap(data_column="not_a_column")

        # Throw error because vmin > vmax
        with pytest.raises(ValueError):
            G.add_edge_sizemap(data_column="weight",vmin=1000,vmax=10)

        # Throw error because size is < 0
        with pytest.raises(ValueError):
            G.add_edge_sizemap(data_column="weight",size_min=-2)

        # Throw error because size_max  < size_min
        with pytest.raises(ValueError):
            G.add_edge_sizemap(data_column="weight",size_max=2,size_min=10)

        # This should work
        G.gpm.neighbors.loc[:,"test"] = np.random.random(len(gpm.neighbors))
        G.add_edge_sizemap(data_column="test")

        # Make sure min/max doing what we think for defaults
        mn = np.min(G.gpm.neighbors.loc[:,"test"])
        mx = np.max(G.gpm.neighbors.loc[:,"test"])

        expected = ("_gpm","test",mn,mx,0.1,20)
        assert np.array_equal(expected,G.edge_options["width"])

        # Check remove_edge_sizemap
        G.remove_edge_sizemap()
        assert G.edge_options["width"] == G._default_edge_width

        # Make sure it takes in various phenotype, vmin, vmax, size_min,
        # size_max and does right stuff with them.
        new_phenos = [np.ones(len(gpm.neighbors)),
                      np.random.random(len(gpm.neighbors))-0.5]

        for i in range(len(new_phenos)):
            G = gpmap.GenotypePhenotypeGraph()
            gpm = gpmap.GenotypePhenotypeMap(d["genotype"])
            G.add_gpm(gpm)

            G.gpm.neighbors.loc[:,"test"] = new_phenos[i]

            vmin = np.random.random()
            vmax = vmin + 10
            sizemin = np.random.random()
            sizemax = sizemin + 10
            G.add_edge_sizemap(data_column="test",vmin=vmin,vmax=vmax,
                               size_min=sizemin,size_max=sizemax)

            expected = ("_gpm","test",vmin,vmax,sizemin,sizemax)

            assert np.array_equal(expected,G.edge_options["width"])
コード例 #28
0
def test_add_remove_edge_cmap(test_data):
    """
    Test both add_edge_cmap and remove_edge_cmap methods.
    """

    G = gpmap.GenotypePhenotypeGraph()
    with pytest.raises(RuntimeError):
        G.add_edge_cmap("test")

    for d in test_data:

        # Test basic construction/error checking
        G = gpmap.GenotypePhenotypeGraph()
        gpm = gpmap.GenotypePhenotypeMap(d["genotype"],
                                         phenotype=d["phenotype"])
        G.add_gpm(gpm)

        with pytest.raises(KeyError):
            G.add_edge_cmap(data_column="not_a_column")

        with pytest.raises(ValueError):
            G.add_edge_cmap(data_column="weight",cmap="not_a_cmap")

        G.add_edge_cmap(data_column="weight",cmap="plasma")

        # Now pass in cmap as cm object
        G = gpmap.GenotypePhenotypeGraph()
        G.add_gpm(gpm)
        cmap = matplotlib.cm.get_cmap("plasma")
        G.add_edge_cmap(data_column="weight",cmap=cmap)

        # Now see if it works
        edge_options = copy.deepcopy(G.edge_options)
        assert type(edge_options["edge_color"]) is tuple
        assert edge_options["edge_color"][0] == "_gpm"
        assert edge_options["edge_color"][1] == "weight"
        assert type(edge_options["edge_color"][2]) is type(matplotlib.cm.get_cmap("plasma"))
        assert edge_options["edge_color"][3] == np.min([1])
        assert edge_options["edge_color"][4] == np.min([1])

        # Now pass in more interesting edge values

        G = gpmap.GenotypePhenotypeGraph()
        gpm = gpmap.GenotypePhenotypeMap(d["genotype"],
                                         phenotype=d["phenotype"])
        gpm.get_neighbors()
        gpm.neighbors.loc[:,"flux"] = np.random.random(len(gpm.neighbors))
        G.add_gpm(gpm)

        G.add_edge_cmap(data_column="flux",cmap="plasma")
        edge_options = copy.deepcopy(G.edge_options)
        assert type(edge_options["edge_color"]) is tuple
        assert edge_options["edge_color"][0] == "_gpm"
        assert edge_options["edge_color"][1] == "flux"
        assert type(edge_options["edge_color"][2]) is type(matplotlib.cm.get_cmap("plasma"))
        assert edge_options["edge_color"][3] == np.min(gpm.neighbors.loc[:,"flux"])
        assert edge_options["edge_color"][4] == np.max(gpm.neighbors.loc[:,"flux"])

        G.add_edge_cmap(data_column="flux",cmap=cmap,vmin=5,vmax=10)
        edge_options = copy.deepcopy(G.edge_options)
        assert type(edge_options["edge_color"]) is tuple
        assert edge_options["edge_color"][0] == "_gpm"
        assert edge_options["edge_color"][1] == "flux"
        assert type(edge_options["edge_color"][2]) is type(matplotlib.cm.get_cmap("plasma"))
        assert edge_options["edge_color"][3] == 5
        assert edge_options["edge_color"][4] == 10

        # Now test removal
        G.remove_edge_cmap()
        removed_options = ["edge_vmin","edge_vmax","edge_cmap"]
        for r in removed_options:
            with pytest.raises(KeyError):
                G.edge_options[r]
        assert G.edge_options["edge_color"] == "black"
コード例 #29
0
def test_add_gpm(test_data):

    # Check for bad value checking
    G = base.GenotypePhenotypeGraph()
    bad_values = ["test",[],1.3,(1,2),base.GenotypePhenotypeGraph()]
    for b in bad_values:
        with pytest.raises(TypeError):
            G.add_gpm(b)

    for d in test_data:

        G = base.GenotypePhenotypeGraph()
        assert G.gpm is None

        # Build with bad edge_weight_column, no neighbors
        gpm = gpmap.GenotypePhenotypeMap(d["genotype"])
        with pytest.raises(ValueError):
            G.add_gpm(gpm,edge_weight_column="not_yet")

        # Build with bad edge_weight_column, pre-built neighbors
        G = base.GenotypePhenotypeGraph()
        gpm = gpmap.GenotypePhenotypeMap(d["genotype"])
        gpm.get_neighbors()
        gpm.neighbors.loc[:,"now_here"] = np.ones(len(gpm.neighbors))
        with pytest.raises(ValueError):
            G.add_gpm(gpm,edge_weight_column="not_yet")

        # Add with good edge_weight_column, pre-built neighbors
        G = base.GenotypePhenotypeGraph()
        gpm = gpmap.GenotypePhenotypeMap(d["genotype"])
        gpm.get_neighbors()
        gpm.neighbors.loc[:,"now_here"] = np.ones(len(gpm.neighbors))
        ret = G.add_gpm(gpm,edge_weight_column="now_here")
        assert ret.edge_weight_column == "now_here"

        # No prebuilt neighbors
        G = base.GenotypePhenotypeGraph()
        gpm = gpmap.GenotypePhenotypeMap(d["genotype"])
        ret = G.add_gpm(gpm)

        # Test return of self
        assert G is ret

        # Make sure neighbors and neighbor weight construted correctly
        assert G.edge_weight_column == "weight"
        assert G.gpm.neighbors is not None
        G.gpm.neighbors.weight

        # Make sure gpm is now attached (as pointer, not copy)
        assert G.gpm is gpm

        # Make sure data loaded into nodes as expected
        assert len(G.nodes) == len(G.gpm.data)
        keys = ["genotype","binary","n_mutations","name"]
        for i in range(len(G.nodes)):
            for k in keys:
                assert G.nodes[i][k] == G.gpm.data.iloc[i][k]

        # Make sure neighbors were generated. (Do not check that the neighbors
        # are right. For this, check the test_gpm.py tests.)
        assert G.gpm.neighbors is not None

        # Make sure edges match neighbors
        assert len(G.gpm.neighbors) == len(G.edges)
        for i in range(len(G.gpm.neighbors)):
            edge = G.gpm.neighbors.edge[i]
            G.edges[edge]

        # Build neighbors differently -- make sure this still worked without
        # generating neighbors on the fly.
        G = base.GenotypePhenotypeGraph()
        gpm = gpmap.GenotypePhenotypeMap(d["genotype"])
        gpm.get_neighbors("hamming",cutoff=3)
        pregen_edges = np.copy(gpm.neighbors.edge)
        G.add_gpm(gpm)
        assert len(pregen_edges) == len(G.gpm.neighbors.edge)
        for i in range(len(G.gpm.neighbors)):
            edge = G.gpm.neighbors.edge[i]
            G.edges[edge]