def test_threshold_match_children_2d(): threshold = gate.ThresholdGate(gate_name="test", parent="test parent", x="X", y="Y", method="density") x = np.random.normal(loc=1., scale=1.5, size=1000) y = np.random.normal(loc=1., scale=1.5, size=1000) data = pd.DataFrame({"X": x, "Y": y}) threshold.add_child( gate.ChildThreshold(name="positive", definition="++,+-", geom=ThresholdGeom(x_threshold=0.5))) threshold.add_child( gate.ChildThreshold(name="negative", definition="--,-+", geom=ThresholdGeom(x_threshold=0.5))) pos = gate.Population(population_name="p1", parent="root", definition="++", geom=ThresholdGeom(x_threshold=0.6), index=data[data.X >= 0.6].index.values) neg = gate.Population(population_name="p2", parent="root", definition="--,-+", geom=ThresholdGeom(x_threshold=0.6), index=data[data.X < 0.6].index.values) pops = threshold._match_to_children([neg, pos]) pos = [p for p in pops if p.definition == "++"][0] assert pos.population_name == "positive" neg = [p for p in pops if p.definition == "--,-+"][0] assert neg.population_name == "negative"
def test_threshold_match_children_1d(): threshold = gate.ThresholdGate(gate_name="test", parent="test parent", x="X", method="density") data = np.random.normal(loc=1., scale=1.5, size=1000) threshold.add_child( gate.ChildThreshold(name="positive", definition="+", geom=ThresholdGeom(x_threshold=0.5))) threshold.add_child( gate.ChildThreshold(name="negative", definition="-", geom=ThresholdGeom(x_threshold=0.5))) pos = gate.Population(population_name="p1", parent="root", definition="+", geom=ThresholdGeom(x_threshold=0.6), index=data[np.where(data >= 0.6)]) neg = gate.Population(population_name="p2", parent="root", definition="-", geom=ThresholdGeom(x_threshold=0.6), index=data[np.where(data >= 0.6)]) pops = threshold._match_to_children([neg, pos]) pos = [p for p in pops if p.definition == "+"][0] assert pos.population_name == "positive" neg = [p for p in pops if p.definition == "-"][0] assert neg.population_name == "negative"
def test_threshold_add_child_invalid_1d(d): threshold = gate.ThresholdGate(gate_name="test", parent="test parent", method="manual", x="X") child = gate.ChildThreshold(name="test child", definition=d, geom=ThresholdGeom(x="X", x_threshold=0.56, y_threshold=0.75)) with pytest.raises(AssertionError) as err: threshold.add_child(child) assert str( err.value) == "Invalid child definition, should be either '+' or '-'"
def test_threshold_predict_2d(): data, _ = make_blobs(n_samples=3000, n_features=2, centers=[(1., 1.), (1., 5.), (5., 0.2)], random_state=42) data = pd.DataFrame({"X": data[:, 0], "Y": data[:, 1]}) threshold = gate.ThresholdGate(gate_name="test", parent="test parent", x="X", y="Y", method="density") threshold.fit(data=data) new_data, _ = make_blobs(n_samples=3000, n_features=2, centers=[(1., 1.), (5., 0.2)], random_state=42) new_data = pd.DataFrame({"X": new_data[:, 0], "Y": new_data[:, 1]}) pops = threshold.predict(new_data) assert len(pops) == 4 assert all([isinstance(p, gate.Population) for p in pops]) assert all([isinstance(p.geom, ThresholdGeom) for p in pops]) assert all([p.geom.x == threshold.x for p in pops]) assert all([p.geom.y == threshold.y for p in pops]) assert all(p.geom.transform_x == threshold.transformations.get("x") for p in pops) assert all(p.geom.transform_y == threshold.transformations.get("y") for p in pops) assert all(i in [p.definition for p in pops] for i in ["++", "--", "-+", "+-"]) neg_idx = new_data[ (new_data.X < threshold.children[0].geom.x_threshold) & (new_data.Y < threshold.children[0].geom.y_threshold)].index.values pos_idx = new_data[ (new_data.X >= threshold.children[0].geom.x_threshold) & (new_data.Y >= threshold.children[0].geom.y_threshold)].index.values negpos_idx = new_data[ (new_data.X < threshold.children[0].geom.x_threshold) & (new_data.Y >= threshold.children[0].geom.y_threshold)].index.values posneg_idx = new_data[ (new_data.X >= threshold.children[0].geom.x_threshold) & (new_data.Y < threshold.children[0].geom.y_threshold)].index.values pos_pop = [p for p in pops if p.definition == "++"][0] neg_pop = [p for p in pops if p.definition == "--"][0] posneg_pop = [p for p in pops if p.definition == "+-"][0] negpos_pop = [p for p in pops if p.definition == "-+"][0] assert np.array_equal(neg_pop.index, neg_idx) assert np.array_equal(pos_pop.index, pos_idx) assert np.array_equal(negpos_pop.index, negpos_idx) assert np.array_equal(posneg_pop.index, posneg_idx)
def test_threshold_fit_1d(): np.random.seed(42) n1 = np.random.normal(loc=0.2, scale=1, size=500) n2 = np.random.normal(loc=2.5, scale=0.2, size=250) n3 = np.random.normal(loc=6.5, scale=0.5, size=500) data = pd.DataFrame({"X": np.hstack([n1, n2, n3])}) threshold = gate.ThresholdGate(gate_name="test", parent="test parent", x="X", method="density") threshold.fit(data=data) assert len(threshold.children) == 2 assert threshold.children[0].geom.x_threshold == threshold.children[ 1].geom.x_threshold assert round(threshold.children[0].geom.x_threshold) == 4 assert all( [i in [c.definition for c in threshold.children] for i in ["+", "-"]])
def test_threshold_add_child(): threshold = gate.ThresholdGate(gate_name="test", parent="test parent", x="X", y="Y", method="manual", transformations={"x": "logicle"}) child = gate.ChildThreshold(name="test child", definition="++", geom=ThresholdGeom(x_threshold=0.56, y_threshold=0.75)) threshold.add_child(child) assert len(threshold.children) assert threshold.children[0].geom.x == threshold.x assert threshold.children[0].geom.y == threshold.y assert threshold.children[0].geom.transform_x == "logicle" assert not threshold.children[0].geom.transform_y
def test_threshold_fit_2d(): data, labels = make_blobs(n_samples=3000, n_features=2, centers=[(1., 1.), (1., 5.), (5., 0.2)], random_state=42) data = pd.DataFrame({"X": data[:, 0], "Y": data[:, 1]}) threshold = gate.ThresholdGate(gate_name="test", parent="test parent", x="X", y="Y", method="density") threshold.fit(data) assert len(threshold.children) == 4 assert len(set([c.geom.x_threshold for c in threshold.children])) == 1 assert len(set([c.geom.y_threshold for c in threshold.children])) == 1 assert all([ i in [c.definition for c in threshold.children] for i in ["++", "--", "+-", "-+"] ]) assert 2 < threshold.children[0].geom.x_threshold < 4 assert 2 < threshold.children[0].geom.y_threshold < 4
def test_threshold_fit_predict_2d(): data, _ = make_blobs(n_samples=4000, n_features=2, centers=[(1., 1.), (1., 7.), (7., 2.), (7., 6.2)], random_state=42) data = pd.DataFrame({"X": data[:, 0], "Y": data[:, 1]}) threshold = gate.ThresholdGate(gate_name="test", parent="test parent", x="X", y="Y", method="density") threshold.fit(data) threshold.label_children({ "++": "Top left", "--": "Other", "-+": "Other", "+-": "Other" }) data, _ = make_blobs(n_samples=3000, n_features=2, centers=[(1., 1.), (1., 7.), (7., 6.2)], random_state=42) data = pd.DataFrame({"X": data[:, 0], "Y": data[:, 1]}) pops = threshold.fit_predict(data=data) assert len(pops) == 2 assert all([isinstance(p, gate.Population) for p in pops]) assert all([isinstance(p.geom, ThresholdGeom) for p in pops]) assert all([p.geom.x == threshold.x for p in pops]) assert all([p.geom.y == threshold.y for p in pops]) assert all(p.geom.transform_x == threshold.transformations.get("x") for p in pops) assert all(p.geom.transform_y == threshold.transformations.get("y") for p in pops) top_left = [p for p in pops if p.population_name == "Top left"][0] other = [p for p in pops if p.population_name == "Other"][0] assert top_left.definition == "++" assert {"+-", "-+", "--"} == set(other.definition.split(",")) assert len(top_left.index) < len(other.index) assert len(top_left.index) > 900 assert len(other.index) > 1900
def test_threshold_fit_predict_1d(): n1 = np.random.normal(loc=0.2, scale=1, size=500) n2 = np.random.normal(loc=2.5, scale=0.2, size=250) n3 = np.random.normal(loc=6.5, scale=0.5, size=500) data = pd.DataFrame({"X": np.hstack([n1, n2, n3])}) threshold = gate.ThresholdGate(gate_name="test", parent="test parent", x="X", method="density") threshold.fit(data=data) threshold.label_children({"+": "Positive", "-": "Negative"}) new_data = pd.DataFrame({ "X": np.hstack([ np.random.normal(loc=0.2, scale=1, size=200), np.random.normal(loc=6.5, scale=0.5, size=1000) ]) }) pops = threshold.fit_predict(new_data) assert len(pops) == 2 assert all([isinstance(p, gate.Population) for p in pops]) assert all([isinstance(p.geom, ThresholdGeom) for p in pops]) assert all([p.geom.x == threshold.x for p in pops]) assert all([p.geom.y == threshold.y for p in pops]) assert all(p.geom.transform_x == threshold.transformations.get("x") for p in pops) assert all(p.geom.transform_y == threshold.transformations.get("y") for p in pops) assert all(i in [p.definition for p in pops] for i in ["+", "-"]) pos_pop = [p for p in pops if p.definition == "+"][0] assert pos_pop.population_name == "Positive" neg_pop = [p for p in pops if p.definition == "-"][0] assert neg_pop.population_name == "Negative" assert len(pos_pop.index) > len(neg_pop.index) assert len(pos_pop.index) > 800 assert len(neg_pop.index) < 300
def test_threshold_predict_1d(): n1 = np.random.normal(loc=0.2, scale=1, size=500) n2 = np.random.normal(loc=2.5, scale=0.2, size=250) n3 = np.random.normal(loc=6.5, scale=0.5, size=500) data = pd.DataFrame({"X": np.hstack([n1, n2, n3])}) threshold = gate.ThresholdGate(gate_name="test", parent="test parent", x="X", method="density") threshold.fit(data=data) new_data = pd.DataFrame({ "X": np.hstack([ np.random.normal(loc=0.2, scale=1, size=500), np.random.normal(loc=6.5, scale=0.5, size=500) ]) }) pops = threshold.predict(new_data) assert len(pops) == 2 assert all([isinstance(p, gate.Population) for p in pops]) assert all([isinstance(p.geom, ThresholdGeom) for p in pops]) assert all([p.geom.x == threshold.x for p in pops]) assert all([p.geom.y == threshold.y for p in pops]) assert all(p.geom.transform_x == threshold.transformations.get("x") for p in pops) assert all(p.geom.transform_y == threshold.transformations.get("y") for p in pops) assert all(i in [p.definition for p in pops] for i in ["+", "-"]) neg_idx = new_data[ new_data.X < threshold.children[0].geom.x_threshold].index.values pos_idx = new_data[ new_data.X >= threshold.children[0].geom.x_threshold].index.values pos_pop = [p for p in pops if p.definition == "+"][0] neg_pop = [p for p in pops if p.definition == "-"][0] assert np.array_equal(neg_pop.index, neg_idx) assert np.array_equal(pos_pop.index, pos_idx)