Ejemplo n.º 1
0
def test_dress_rehearsal(tmp_path):
    s = WilsonScanner(scale=5, eft="WET", basis="flavio")

    s.set_dfunction(random_kinematics,
                    sampling=np.linspace(0.0, 1.0, 10),
                    normalize=True)
    s.set_no_workers(no_workers=1)

    s.set_spoints_equidist({
        "CVL_bctaunutau": (-0.5, 0.5, 3),
        "CSL_bctaunutau": (-0.5, 0.5, 3),
        "CT_bctaunutau": (-0.1, 0.1, 3),
    })
    d = Data()
    r = s.run(d)
    r.write()
    # Can remove str casting once we remove py3.5 support
    d.write(str(tmp_path / "dress_rehearsal.sql"), overwrite="overwrite")

    d = DataWithErrors(str(tmp_path / "dress_rehearsal.sql"))

    d.add_rel_err_uncorr(0.01)
    d.add_err_poisson(1000)

    c = HierarchyCluster()
    c.set_metric(chi2_metric)
    b = Benchmark()
    b.set_metric(chi2_metric)

    c.set_max_d(1)
    c.run(d).write()
    b.run(d).write()
def test_reuse_hierarchy(_data):
    d = _data.copy()
    c = HierarchyCluster()
    c.set_metric("euclidean")
    c.set_max_d(1.5)
    r = c.run(d)
    r.write()
    r2 = c.run(d, reuse_hierarchy_from=r)
    r2.write(cluster_column="reused")
    assert d.df["cluster"].tolist() == d.df["reused"].tolist()
def test_reuse_hierarchy_fail_different_data(_data):
    d = _data.copy()
    e = _data.copy()
    c = HierarchyCluster()
    c.set_metric("euclidean")
    c.set_max_d(1.5)
    r = c.run(d)
    r.write()
    with pytest.raises(ValueError, match=".*different data object.*"):
        c.run(e, reuse_hierarchy_from=r)
 def test_reuse_hierarchy(self):
     d = self.d.copy()
     c = HierarchyCluster()
     c.set_metric("euclidean")
     c.set_max_d(1.5)
     r = c.run(d)
     r.write()
     r2 = c.run(d, reuse_hierarchy_from=r)
     r2.write(cluster_column="reused")
     self.assertListEqual(d.df["cluster"].tolist(), d.df["reused"].tolist())
 def test_reuse_hierarchy_fail_different_data(self):
     d = self.d.copy()
     e = self.d.copy()
     c = HierarchyCluster()
     c.set_metric("euclidean")
     c.set_max_d(1.5)
     r = c.run(d)
     r.write()
     with self.assertRaises(ValueError) as ex:
         c.run(e, reuse_hierarchy_from=r)
     self.assertTrue("different data object" in str(ex.exception))
def test_cluster(_data):
    d = _data.copy()
    c = HierarchyCluster()
    c.set_metric("euclidean")
    c.set_max_d(0.75)
    c.run(d).write()
    c.set_max_d(1.5)
    c.run(d).write(cluster_column="cluster15")
    # The minimal distance between our distributions is 1, so they all
    # end up in different clusters
    assert len(d.clusters()) == d.n
    # This is a bit unfortunate, since we have so many distribution pairs
    # with equal distance (so it's up to the implementation of the algorithm
    # , which clusters develop) but this is what happened so far:
    assert len(d.clusters(cluster_column="cluster15")) == 6
def test_dendrogram_plot(_data, tmp_path):
    c = HierarchyCluster()
    c.set_metric()
    c.set_max_d(0.2)
    r = c.run(_data)
    r.dendrogram(output=str(tmp_path / "output.pdf"))
def test_hierarchy_cluster_no_max_d(_data):
    d = _data.copy()
    c = HierarchyCluster()
    with pytest.raises(ValueError, match=".*set_max_d.*"):
        c.run(d)
 def test_dendrogram_plot(self):
     c = HierarchyCluster()
     c.set_metric()
     c.set_max_d(0.2)
     r = c.run(self.d)
     r.dendrogram()
 def test_hierarchy_cluster_no_max_d(self):
     d = self.d.copy()
     c = HierarchyCluster()
     with self.assertRaises(ValueError) as e:
         c.run(d)
     self.assertTrue("set_max_d" in str(e.exception))