def gpm(test_data): """ Create a genotype-phenotype map """ d = test_data[0] return GenotypePhenotypeMap(genotype=d["genotype"], phenotype=d["phenotype"], wildtype=d["wildtype"])
def gpm(): """Create a genotype-phenotype map""" wildtype = "000" genotypes = ["000", "001", "010", "100", "011", "101", "110", "111"] phenotypes = [0.1, 0.1, 0.5, 0.4, 0.2, 0.8, 0.5, 1.0] stdeviations = 0.1 return GenotypePhenotypeMap(wildtype, genotypes, phenotypes, stdeviations=stdeviations)
def fit_transform(self, X=None, y=None, **kwargs): self.fit(X=X, y=y, **kwargs) ypred = self.predict(X=X) # Transform map. gpm = GenotypePhenotypeMap.read_dataframe( dataframe=self.gpm.data[ypred==1], wildtype=self.gpm.wildtype, mutations=self.gpm.mutations ) return gpm
def test_json(test_data, tmp_path): """ Test reading/writing/fidelity of json. """ for d in test_data: gpm = GenotypePhenotypeMap(wildtype=d["wildtype"], genotype=d["genotype"], phenotype=d["phenotype"], uncertainty=d["uncertainty"]) # Write json file json_file = os.path.join(tmp_path, "tmp.json") gpm.to_json(filename=json_file) assert os.path.isfile(json_file) # Read json file new_gpm = gpmap.read_json(filename=json_file) conftest.compare_gpmap(gpm, new_gpm)
def test_csv(test_data, tmp_path): """ Write to a csv and make sure it reads back in properly. """ for d in test_data: gpm = GenotypePhenotypeMap(genotype=d["genotype"], wildtype=d["wildtype"], phenotype=d["phenotype"], uncertainty=d["uncertainty"]) # Write out a csv file out_file = os.path.join(tmp_path, "tmp.csv") gpm.to_csv(out_file, index=False) assert os.path.exists(out_file) gpm_read = gpmap.read_csv(out_file, wildtype=d["wildtype"]) # Make sure the written and read gpmaps ar ethe same conftest.compare_gpmap(gpm, gpm_read) # Do not give wildtype. Should still work because the wildtype was # inferred. gpm_read = gpmap.read_csv(out_file) conftest.compare_gpmap(gpm, gpm_read) # Check ability to read labels back in site_labels = [f"{x}" for x in range(10, 10 + len(d["wildtype"]), 1)] gpm = GenotypePhenotypeMap(genotype=d["genotype"], wildtype=d["wildtype"], site_labels=site_labels) out_file = os.path.join(tmp_path, "tmp.csv") gpm.to_csv(out_file) gpm_read = gpmap.read_csv(out_file) for i in range(len(gpm_read.site_labels)): # Skip virtual site_labels added for invariant sites if len(d["mutations"][i]) == 1: continue assert gpm_read.site_labels[i] == gpm.site_labels[i] # Read in with bad wildtype. Should throw warning and then have # sequential site labels. with pytest.warns(UserWarning): gpm_read = gpmap.read_csv(out_file, wildtype=d["mutant"]) assert np.array_equal(gpm_read.site_labels, range(len(d["wildtype"])))
def test_add_missing_genotypes(test_data): # Create list of ~100 unqiue genotype sampled from 2^L space L = 8 genotype = [] for i in range(100): genotype.append("".join([random.choice(["0", "1"]) for _ in range(L)])) genotype = list(set(genotype)) num_now = len(genotype) # Build map and make sure map looks like we think before adding missing gpm = GenotypePhenotypeMap(genotype=genotype, phenotype=np.ones(len(genotype))) assert gpm.num_genotypes == num_now assert gpm.length == L num_possible = 2**L num_new = num_possible - num_now # Make sure mem check cutoff is working with pytest.raises(ValueError): gpm.add_missing_genotypes(mem_check_cutoff=0) # add missing genotype gpm.add_missing_genotypes() # Make sure number of genotype is now correct assert gpm.num_genotypes == num_possible # make sure dataframe looks right... assert np.sum(gpm._data.inferred) == num_new assert np.sum(np.isnan(gpm._data.n_mutations)) == 0 assert np.sum(np.isnan(gpm._data.phenotype)) == num_new
def test_excel(test_data, tmp_path): """ Test reading/writing/fidelity of excel. """ for d in test_data: gpm = GenotypePhenotypeMap(genotype=d["genotype"], wildtype=d["wildtype"], phenotype=d["phenotype"], uncertainty=d["uncertainty"]) # Write excel file excel_file = os.path.join(tmp_path, "tmp.xlsx") gpm.to_excel(filename=excel_file) assert os.path.isfile(excel_file) # Read in and make sure it worked. new_gpm = gpmap.read_excel(filename=excel_file, wildtype=d["wildtype"]) conftest.compare_gpmap(gpm, new_gpm) # Do not give wildtype. Should still work because the wildtype was # inferred. gpm_read = gpmap.read_excel(filename=excel_file) conftest.compare_gpmap(gpm, gpm_read) # Check ability to read labels back in site_labels = [f"{x}" for x in range(10, 10 + len(d["wildtype"]), 1)] gpm = GenotypePhenotypeMap(genotype=d["genotype"], wildtype=d["wildtype"], site_labels=site_labels) out_file = os.path.join(tmp_path, "tmp.xlsx") gpm.to_excel(out_file) gpm_read = gpmap.read_excel(out_file) for i in range(len(gpm_read.site_labels)): # Skip virtual site_labels added for invariant sites if len(d["mutations"][i]) == 1: continue assert gpm_read.site_labels[i] == gpm.site_labels[i] # Read in with bad wildtype. Should throw warning and then have # sequential site labels. with pytest.warns(UserWarning): gpm_read = gpmap.read_excel(out_file, wildtype=d["mutant"]) assert np.array_equal(gpm_read.site_labels, range(len(d["wildtype"])))
def test_pos(test_data): # requires G with pytest.raises(TypeError): gpgraph.pyplot.pos.flattened() # pass dumb argument with pytest.raises(ValueError): gpgraph.pyplot.pos.flattened("stupid") for d in test_data: gpm = GenotypePhenotypeMap(genotypes=d["genotypes"], wildtype=d["wildtype"], phenotypes=d["phenotypes"]) G = GenotypePhenotypeGraph() # test check for loaded dataset. with pytest.raises(ValueError): gpgraph.pyplot.pos.flattened(G) # Load in data G.add_gpm(gpm) # Should work ret = gpgraph.pyplot.pos.flattened(G) # Check basic sanity of returns assert type(ret) is dict for k in ret: G.nodes[k] assert type(ret[k]) is list assert len(ret[k]) == 2 # Check for bad scale values with pytest.raises(ValueError): gpgraph.pyplot.pos.flattened(G, scale=0) with pytest.raises(ValueError): gpgraph.pyplot.pos.flattened(G, scale=-1) # these should work ret = gpgraph.pyplot.pos.flattened(G, scale=10.0) assert type(ret) is dict ret = gpgraph.pyplot.pos.flattened(G, scale=22.3) assert type(ret) is dict # make sure it takes vertical parameter. not really testing if it has # effect (test for graphs are annoying... ) ret = gpgraph.pyplot.pos.flattened(G, scale=1, vertical=True) assert type(ret) is dict
def test_dict(test_data): """ Test converstion of gpmap to dict. """ # Stupidly trivial map gpmap.read_dict({"wildtype": "0", "data": {"genotype": ["0"]}}) # Make sure wildtype check is working with pytest.raises(ValueError): gpmap.read_dict({"data": {"genotype": ["0"]}}) # Make sure wildtype length/genotype length check working with pytest.raises(ValueError): gpmap.read_dict({"wildtype": "01", "data": {"genotype": ["0"]}}) for d in test_data: gpm = GenotypePhenotypeMap(wildtype=d["wildtype"], genotype=d["genotype"], phenotype=d["phenotype"], uncertainty=d["uncertainty"]) # Write out as a dcitionary gpm_as_dict = gpm.to_dict() # Check wildtype meta data, mutations meta data assert gpm_as_dict["wildtype"] == d["wildtype"] for i in range(len(gpm_as_dict["mutations"])): assert np.array_equal(gpm_as_dict["mutations"][i], d["mutations"][i]) # This is a pandas data conversion. Don't check in detail, just make sure # the conversion dumped out a a dict. assert type(gpm_as_dict["data"]) is dict # Read dictionary back in and make sure it's the same new_gpm = gpmap.read_dict(gpm_as_dict) conftest.compare_gpmap(gpm, new_gpm)
def fit_transform(self, X=None, y=None, **kwargs): self.fit(X=X, y=y, **kwargs) linear_phenotypes = self.transform(X=X, y=y) # Transform map. gpm = GenotypePhenotypeMap.read_dataframe( dataframe=self.gpm.data, wildtype=self.gpm.wildtype, mutations=self.gpm.mutations ) gpm.data['phenotypes'] = linear_phenotypes return gpm
def split_gpm(gpm, idx=None, nobs=None, fraction=None): """Split GenotypePhenotypeMap into two sets, a training and a test set. Parameters ---------- data : pandas.DataFrame full dataset to split. idx : list List of indices to include in training set nobs : int number of observations in training. fraction : float fraction in training set. Returns ------- train_gpm : GenotypePhenotypeMap training set. test_gpm : GenotypePhenotypeMap test set. """ train, test = split_data(gpm.data, idx=idx, nobs=nobs, fraction=fraction) train_gpm = GenotypePhenotypeMap.read_dataframe(train, wildtype=gpm.wildtype, mutations=gpm.mutations) test_gpm = GenotypePhenotypeMap.read_dataframe(test, wildtype=gpm.wildtype, mutations=gpm.mutations) return train_gpm, test_gpm
def test_dataframe(test_data, tmp_path): """ Test reading of dataframe. """ for d in test_data: # Pretty standard map gpm = GenotypePhenotypeMap(genotype=d["genotype"], wildtype=d["wildtype"], phenotype=d["phenotype"], uncertainty=d["uncertainty"]) df = pd.DataFrame({ "genotype": d["genotype"], "phenotype": d["phenotype"], "uncertainty": d["uncertainty"] }) gpm_from_df = gpmap.read_dataframe(df, wildtype=d["wildtype"]) conftest.compare_gpmap(gpm, gpm_from_df) # Minimal map gpm = GenotypePhenotypeMap(wildtype=d["wildtype"], genotype=d["genotype"]) df = pd.DataFrame({"genotype": d["genotype"]}) gpm_from_df = gpmap.read_dataframe(df, wildtype=d["wildtype"]) conftest.compare_gpmap(gpm, gpm_from_df) # Read without wildtype --> should still work gpm_from_df = gpmap.read_dataframe(df) conftest.compare_gpmap(gpm, gpm_from_df) # Map without genotype (fail) df = pd.DataFrame({"phenotype": d["phenotype"]}) with pytest.raises(ValueError): gpm_from_df = gpmap.read_dataframe(df, wildtype=d["wildtype"])
def test_draw_node_labels(test_data): for d in test_data: gpm = GenotypePhenotypeMap(genotypes=d["genotypes"], phenotypes=d["phenotypes"]) G = gpgraph.GenotypePhenotypeGraph() G.add_gpm(gpm) fig, ax = plt.subplots(figsize=(10, 10)) # Make sure it draws something. node_labels = draw_node_labels(G, flattened(G), ax) ax.plot # <- will throw attribute error if this is not right len(node_labels)
def test_genotype_is_in(): gpm = GenotypePhenotypeMap(genotype=["SG", "PF", "SF", "PG"], wildtype="SG") with pytest.raises(TypeError): gpm.genotype_is_in(None) assert gpm.genotype_is_in("SG") is True v = gpm.genotype_is_in(["SG", "PF", "SF", "PG"]) assert len(v) == 4 assert sum(v) == 4
def test__nx_wrapper(test_data): gpm = GenotypePhenotypeMap(genotypes=["AA", "AB", "BA", "BB"]) G = gpgraph.GenotypePhenotypeGraph() G.add_gpm(gpm) pos = flattened(G) fig, ax = plt.subplots(figsize=(10, 10)) # Should throw warning even though bad arg. with pytest.warns(UserWarning): ret = _nx_wrapper(nx.draw_networkx_nodes, G=G, pos=pos, ax=ax, not_a_real_argument=10)
def gpmap_from_gpmap( original_gpmap, new_genotypes, new_phenotypes, new_n_replicates=1, new_stdeviations=None, ): """Generate a GenotypePhenotypeMap from another GenotypePhenotypeMap with new genotypes, phenotypes, n_replicates, and stdevations. """ gpm = original_gpmap return GenotypePhenotypeMap(wildtype=gpm.wildtype, mutations=gpm.mutations, genotypes=new_genotypes, phenotypes=new_phenotypes, stdeviations=new_stdeviations, n_replicates=new_n_replicates)
def gpvolve_base(): wildtype = "AAA" genotypes = ["AAA", "AAB", "ABA", "BAA", "ABB", "BAB", "BBA", "BBB"] binary = ['000', '001', '010', '100', '011', '101', '110', '111'] mutations = { 0: ["A", "B"], 1: ["A", "B"], 2: ["A", "B"], } phenotypes = np.random.rand(len(genotypes)) tmp_gpm_file = GenotypePhenotypeMap(wildtype=wildtype, genotypes=genotypes, phenotypes=phenotypes, log_transform=False, mutations=mutations) gpv = GenotypePhenotypeMSM(tmp_gpm_file) return gpv
def test_getters(test_data): """ Test sundry getters. """ for d in test_data: gpm = GenotypePhenotypeMap(wildtype=d["wildtype"], genotype=d["genotype"], phenotype=d["phenotype"], uncertainty=d["uncertainty"]) assert gpm.length == d["length"] assert gpm.num_genotypes == d["num_genotypes"] assert gpm.wildtype == d["wildtype"] assert gpm.mutant == d["mutant"] # Make sure that the .data is the object, *not* a copy of ._data assert gpm.data is gpm._data assert gpm.encoding_table is gpm._encoding_table for i in range(len(gpm.mutations)): assert np.array_equal(gpm.mutations[i], d["mutations"][i]) assert np.array_equal(gpm.genotype, d["genotype"]) assert np.array_equal(gpm.binary, d["binary"]) assert np.array_equal(gpm.phenotype, d["phenotype"]) assert np.array_equal(gpm.uncertainty, d["uncertainty"]) # Make sure initial values of neighors are set properly assert gpm.neighbors is None assert gpm.neighbor_function is None assert gpm.neighbor_cutoff is None # Run get_neighbors call, which will now populate neighbors, # neighbor_function, and neighbor_cutoff gpm.get_neighbors() assert isinstance(gpm.neighbors, pd.DataFrame) assert gpm._neighbors is gpm.neighbors assert gpm.neighbor_function == "hamming" assert gpm.neighbor_cutoff == 1 gpm.get_neighbors(cutoff=17) assert isinstance(gpm.neighbors, pd.DataFrame) assert gpm._neighbors is gpm.neighbors assert gpm.neighbor_function == "hamming" assert gpm.neighbor_cutoff == 17
def test_node_list_sanity(): G = GenotypePhenotypeGraph() gpm = GenotypePhenotypeMap(genotypes=["AA", "AB", "AC"]) G.add_gpm(gpm) bad_node_lists = [[-200, 300, 0], ["cow", "come", "home"], [-1, 0, 1], [[], [], 0], [0, 0, 0], [0, 1, 2, 3]] for b in bad_node_lists: with pytest.raises(ValueError): check.node_list_sanity(b, G) check.node_list_sanity([0, 1, 2], G) check.node_list_sanity([0, 1], G) check.node_list_sanity([0], G) check.node_list_sanity([0, 2], G) with pytest.raises(ValueError): check.node_list_sanity([0, 1, 2], "G") with pytest.raises(ValueError): check.node_list_sanity([0, 1, 2], GenotypePhenotypeGraph)
def test_get_neighbors(): for use_cython in [False, True]: print("Use cython?", use_cython) # Check hamming = 1 gpm = GenotypePhenotypeMap(genotype=["SG", "PF", "SF", "PG"]) gpm.get_neighbors("hamming", cutoff=1, use_cython=use_cython) new_edges = list(gpm._neighbors.loc[:, "edge"]) new_edges.sort() hamming_edges = [(0, 0), (0, 2), (2, 0), (0, 3), (3, 0), (1, 1), (1, 2), (2, 1), (1, 3), (3, 1), (2, 2), (3, 3)] hamming_edges.sort() for i in range(len(new_edges)): assert np.array_equal(hamming_edges[i], new_edges[i]) # Check hamming = 2 gpm = GenotypePhenotypeMap(genotype=["SG", "PF", "SF", "PG"]) gpm.get_neighbors("hamming", cutoff=2, use_cython=use_cython) new_edges = list(gpm._neighbors.loc[:, "edge"]) new_edges.sort() hamming_edges = [(0, 0), (0, 1), (1, 0), (0, 2), (2, 0), (0, 3), (3, 0), (1, 1), (1, 2), (2, 1), (1, 3), (3, 1), (2, 2), (2, 3), (3, 2), (3, 3)] hamming_edges.sort() for i in range(len(new_edges)): assert np.array_equal(hamming_edges[i], new_edges[i]) # Check amino acid codon = 1 gpm = GenotypePhenotypeMap(genotype=["SG", "PF", "SF", "PG"]) gpm.get_neighbors("codon", cutoff=1, use_cython=use_cython) new_edges = list(gpm._neighbors.loc[:, "edge"]) new_edges.sort() codon_edges = [(0, 0), (0, 3), (3, 0), (1, 1), (1, 2), (2, 1), (2, 2), (3, 3)] codon_edges.sort() for i in range(len(new_edges)): assert np.array_equal(codon_edges[i], new_edges[i])
def test_sample_genotypes(): gpm = GenotypePhenotypeMap(genotype=["SG", "PF", "SF", "PG"], wildtype="SG") with pytest.raises(ValueError): gpm.sample_genotypes(num_to_return=4, fx_to_return=1) with pytest.raises(ValueError): gpm.sample_genotypes(num_to_return=None, fx_to_return=None) with pytest.raises(ValueError): gpm.sample_genotypes(num_to_return=0.5, fx_to_return=0.5) bad_value = [-1, "stupid", 5, [], {"test": 1}] for v in bad_value: with pytest.raises(ValueError): gpm.sample_genotypes(num_to_return=v) for i in range(1, 5): x = gpm.sample_genotypes(num_to_return=i) assert x is not gpm assert len(x.data) == i bad_value = [-1, "stupid", 1.1, [], {"test": 1}] for v in bad_value: with pytest.raises(ValueError): gpm.sample_genotypes(fx_to_return=v) for i, fx in enumerate([0.25, 0.5, 0.75, 1.0]): x = gpm.sample_genotypes(fx_to_return=fx) assert x is not gpm assert len(x.data) == i + 1 x = gpm.sample_genotypes(fx_to_return=0.1) assert len(x.data) == 1 bad_value = ["stupid", 5, ["not", "really"], [[12], [15, 16]], np.zeros(5)] for b in bad_value: with pytest.raises(ValueError): gpm.sample_genotypes(fx_to_return=1, keep_genotypes=b) with pytest.raises(ValueError): gpm.sample_genotypes(fx_to_return=0.25, keep_genotypes=["SG", "PF"]) # Get exactly those two keep_genotypes x = gpm.sample_genotypes(fx_to_return=0.5, keep_genotypes=["SF", "PG"]) assert np.sum(x.data.genotype.isin(["SF", "PG"])) == 2 # Get exactly those two keep_genotypes with pytest.raises(ValueError): x = gpm.sample_genotypes(fx_to_return=0.5, keep_genotypes=["SF", "PG"], keep_wt=True) x = gpm.sample_genotypes(fx_to_return=0.75, keep_genotypes=["SF", "PG"], keep_wt=True) assert np.sum(x.data.genotype.isin(["SG", "SF", "PG"])) == 3 x = gpm.sample_genotypes(fx_to_return=0.75) assert len(x.data) == 3 assert len(np.unique(x.data.genotype)) == 3
def read_csv(cls, fname, wildtype, mutations=None): """Read graph from csv file.""" gpm = GenotypePhenotypeMap.read_csv(fname, wildtype=wildtype, mutations=mutations) return cls(gpm)
def test__check_internal_consistency(): """ Test private _check_internal_consistency method. """ # Bypass wildtype setter. Direct access to data frame, no change in binary. # Access via .data getter and voila! should change. gpm = GenotypePhenotypeMap(["AA", "AB", "BA", "BB"]) assert np.array_equal(["00", "01", "10", "11"], gpm.binary) gpm._wildtype = "BB" assert np.array_equal(["00", "01", "10", "11"], gpm._data.loc[:, "binary"]) assert np.array_equal(["11", "10", "01", "00"], gpm.data.loc[:, "binary"]) assert np.array_equal(["11", "10", "01", "00"], gpm.binary) # Bypass site_labels setter. Direct access to data frame, no change in names. # Access via .data getter and voila! should change. gpm = GenotypePhenotypeMap(["AA", "AB", "BA", "BB"]) assert np.array_equal(["wildtype", "A1B", "A0B", "A0B/A1B"], gpm.name) gpm._site_labels = [1, 2] assert np.array_equal(["wildtype", "A1B", "A0B", "A0B/A1B"], gpm._data.loc[:, "name"]) assert np.array_equal(["wildtype", "A2B", "A1B", "A1B/A2B"], gpm.data.loc[:, "name"]) assert np.array_equal(["wildtype", "A2B", "A1B", "A1B/A2B"], gpm.name) # Bypass mutations setter. Direct access to data frame, no change in binary. # Access via .data getter and voila! should change. gpm = GenotypePhenotypeMap(["AA", "AB", "BA", "BB"]) assert np.array_equal(["00", "01", "10", "11"], gpm.binary) gpm._mutations = [["A", "B", "C"], ["A", "B"]] assert np.array_equal(["00", "01", "10", "11"], gpm._data.loc[:, "binary"]) assert np.array_equal(["000", "001", "100", "101"], gpm.data.loc[:, "binary"]) assert np.array_equal(["000", "001", "100", "101"], gpm.binary) # Change a genotype to have a state not previously seen. Direct access to # gpm._data or gpm._mutations doesn't catch it. But gpm.data does. gpm = GenotypePhenotypeMap(["AA", "AB", "BA", "BB"]) assert np.array_equal(["00", "01", "10", "11"], gpm.binary) gpm._data.loc[0, "genotype"] = "CA" assert np.array_equal(["00", "01", "10", "11"], gpm._data.loc[:, "binary"]) assert np.array_equal(["010", "001", "100", "101"], gpm.binary)
def test_add_model(test_data): for d in test_data: gpm = GenotypePhenotypeMap(genotypes=d["genotypes"], wildtype=d["wildtype"], phenotypes=d["phenotypes"], bad_pheno=d["phenotypes"]) # Give negative phenotypes (which models except ratio will not like) gpm.data.loc[:, "bad_pheno"] = gpm.data.loc[:, "bad_pheno"] * -1 G = GenotypePhenotypeGraph() # Make sure model checks properly for adding gpm first with pytest.raises(ValueError): G.add_model() # Use basic hamming connectivity model G.add_gpm(gpm, "hamming", 1) # Try calculating ratio and sswm models for m in ["ratio", "sswm"]: G.add_model(column="phenotypes", model=m) # Try calculating moran and mcclandish with multiple population sizes for m in ["moran", "mcclandish"]: for N in [1, 3, 10, 30, 100, 300, 1000, 3000, 10000]: G.add_model(column="phenotypes", model=m, population_size=N) # Pass in some crappy data with pytest.raises(ValueError): G.add_model(column="bad_pheno", model="sswm") # should work for ratio G.add_model(column="bad_pheno", model="ratio") # Pass in string data with pytest.raises(ValueError): G.add_model(column="genotypes", model="sswm") # don't pass in required arg with pytest.raises(ValueError): G.add_model(column="phenotypes", model="mcclandish") # pass in nonsensical version of required arg with pytest.raises(ValueError): G.add_model(column="phenotypes", model="mcclandish", population_size=-5) # pass in undefined model with pytest.raises(ValueError): G.add_model(column="phenotypes", model="not_a_real_model") # pass in unrecognized arg to a model with pytest.raises(ValueError): G.add_model(column="phenotypes", model="sswm", population_size=10) # pass in incompatible model def bad_model(v1): return 5 with pytest.raises(ValueError): G.add_model(column="phenotypes", model=bad_model) # okay model, only float args def okay_model(v1, v2): return 5 G.add_model(column="phenotypes", model=okay_model) # okay model, float and custom kwarg def okay_model(v1, v2, needed_kwarg): return 5 G.add_model(column="phenotypes", model=okay_model, needed_kwarg=5) # check node property setting for this model where we know the answer for e in G.edges: assert G.edges[e]["prob"] == 5 # pass in no model (should set all prob to 1) G.add_model(column="phenotypes") for e in G.edges: assert G.edges[e]["prob"] == 1 # pass in nothing (all prob should be 1) G.add_model() for e in G.edges: assert G.edges[e]["prob"] == 1 # pass in no data with model. prob should all be 0 for sswm because # fitness is same all genotypes. G.add_model(model="sswm") for e in G.edges: assert G.edges[e]["prob"] == 0
def load(name): """Loads a provide dataset.""" fname = "{}.json".format(name) fpath = get_dataset_path(fname) gpm = GenotypePhenotypeMap.read_json(fpath) return gpm
def read_json(cls, fname): """ Read graph from json file. """ gpm = GenotypePhenotypeMap.read_json(fname) return cls(gpm)
def test_wildtype_setter(test_data): """ This is a complicated setter that triggers recalcuation of the binary map. """ for d in test_data: gpm = GenotypePhenotypeMap(wildtype=d["wildtype"], genotype=d["genotype"]) # Trivial set (should not change) gpm.wildtype = d["wildtype"] assert np.array_equal(gpm.binary, d["binary"]) assert gpm.mutant == d["mutant"] # Something with wrong dimension with pytest.raises(ValueError): gpm.wildtype = "1" # Feed in something that can't be interpreted as iterable with pytest.raises(ValueError): gpm.wildtype = 0 # Feed in something with wrong dimension with pytest.raises(ValueError): gpm.wildtype = [] # Feed in genotype with states not seen in the test datasets with pytest.raises(ValueError): gpm.wildtype = "".join(["Z" for _ in range(len(gpm.wildtype))]) # Feed in wildtype as list, not str with pytest.raises(ValueError): gpm.wildtype = [s for s in gpm.wildtype] # Should work gpm.wildtype = d["mutant"] # Check that wildtype actually fhanged assert gpm.wildtype == d["mutant"] # Check mutant (should differ at all variable sites) for i in range(len(gpm.wildtype)): if len(d["mutations"][i]) != 1: assert gpm.mutant[i] != gpm.wildtype[i] # Make sure n_mutations has flipped properly. for i in range(len(gpm.genotype)): counts = sum([ 1 for j in range(len(gpm.genotype[i])) if gpm.genotype[i][j] != gpm.wildtype[j] ]) assert gpm.n_mutations[i] == counts # Check to make sure the binary was recalculated. This tests whether # the encoding table and binary are different from before. Does *not* # check accuracy. This check is done in the # test_utils.test_genotypes_to_binary function. binary = utils.genotypes_to_binary(gpm.genotype, gpm.encoding_table) assert np.array_equal(binary, gpm.binary) assert not np.array_equal(binary, d["binary"])
Use a linear, logistic regression model to estimate the positive/negative effects of mutations. """ # Imports import matplotlib.pyplot as plt from gpmap import GenotypePhenotypeMap from epistasis.models import EpistasisLogisticRegression from epistasis.pyplot import plot_coefs # The data wildtype = "000" genotypes = ['000', '001', '010', '011', '100', '101', '110', '111'] phenotypes = [0.366, -0.593, 1.595, -0.753, 0.38, 1.296, 1.025, -0.519] gpm = GenotypePhenotypeMap(wildtype, genotypes, phenotypes) # Threshold threshold = 1.0 # Initialize a model model = EpistasisLogisticRegression(threshold=threshold) model.add_gpm(gpm) # Fit the model model.fit() fig, ax = plot_coefs(model, figsize=(1, 3)) plt.show()