Ejemplo n.º 1
0
 def test(self):
     d1 = Data()
     d2 = Data()
     d1.df = pd.DataFrame({"cluster": [1, 1, 2, 2, 3]})
     d2.df = pd.DataFrame({"cluster": [2, 2, 3, 3, 1]})
     ttcmr = TrivialClusterMatcher().run(d1, d2)
     self.assertDictEqual(ttcmr.rename_dct, {2: 1, 3: 2, 1: 3})
Ejemplo n.º 2
0
 def setUp(self):
     self.d1 = Data()
     self.d2 = Data()
     self.d3 = Data()
     self.d4 = Data()
     self.d1.df = pd.DataFrame({"cluster": [1, 1, 2, 2, 3]})
     self.d2.df = pd.DataFrame({"cluster": [2, 2, 3, 3, 1]})
     self.d3.df = pd.DataFrame({"cluster": [2, 1, 2, 2, 3]})
     self.d4.df = pd.DataFrame({"cluster": [4, 1, 2, 2, 3]})
Ejemplo n.º 3
0
def test_dress_rehearsal(tmp_path):
    s = WilsonScanner(scale=5, eft="WET", basis="flavio")

    s.set_dfunction(random_kinematics,
                    sampling=np.linspace(0.0, 1.0, 10),
                    normalize=True)
    s.set_no_workers(no_workers=1)

    s.set_spoints_equidist({
        "CVL_bctaunutau": (-0.5, 0.5, 3),
        "CSL_bctaunutau": (-0.5, 0.5, 3),
        "CT_bctaunutau": (-0.1, 0.1, 3),
    })
    d = Data()
    r = s.run(d)
    r.write()
    # Can remove str casting once we remove py3.5 support
    d.write(str(tmp_path / "dress_rehearsal.sql"), overwrite="overwrite")

    d = DataWithErrors(str(tmp_path / "dress_rehearsal.sql"))

    d.add_rel_err_uncorr(0.01)
    d.add_err_poisson(1000)

    c = HierarchyCluster()
    c.set_metric(chi2_metric)
    b = Benchmark()
    b.set_metric(chi2_metric)

    c.set_max_d(1)
    c.run(d).write()
    b.run(d).write()
    def load(cls,
             directory: Union[str, PurePath],
             loader: Optional[Callable] = None) -> "NoisySampleResult":
        """Load from output directory

        Args:
            directory: Path to directory to load from
            loader: Function used to load data (optional).

        Example:

        .. code-block:: python

            def loader(path):
                d = clusterking.DataWithError(path)
                d.add_rel_err_uncorr(0.01)
                return d

            nsr = NoisySampleResult.load("/path/to/dir/", loader=loader)

        """
        directory = Path(directory)
        if not directory.is_dir():
            raise FileNotFoundError(
                "{} does not exist or is not a directory".format(directory))
        samples = []
        for path in sorted(directory.glob("data_*.sql")):
            if loader is not None:
                d = loader(path)
            else:
                d = Data(path)
            samples.append(d)
        return NoisySampleResult(samples=samples)
Ejemplo n.º 5
0
 def setUp(self):
     self.s = WilsonScanner(scale=5, eft="WET", basis="flavio")
     self.s.set_spoints_equidist({
         "CVL_bctaunutau": (-1, 1, 2),
         "CSL_bctaunutau": (-1, 1, 2),
         "CT_bctaunutau": (-1, 1, 2),
     })
     self.s.set_dfunction(simple_func, binning=[0, 1, 2], normalize=True)
     self.d = Data()
Ejemplo n.º 6
0
 def test_run_identity(self):
     s = Scanner()
     d = Data()
     s.set_spoints_equidist({"a": (0, 1, 2)})
     s.set_dfunction(func_identity)
     s.run(d).write()
     self.assertEqual(sorted(list(d.df.columns)), ["a", "bin0"])
     self.assertAllClose(d.df.values, np.array([[0.0, 0.0], [1.0, 1.0]]))
     d.write(Path(self.tmpdir.name) / "test.sql")
Ejemplo n.º 7
0
 def test_run_simple_bins_singlecore(self):
     s = Scanner()
     d = Data()
     s.set_spoints_equidist({"a": (0, 1, 2)})
     s.set_dfunction(func_zero_bins, binning=[0, 1, 2])
     s.set_no_workers(1)
     s.run(d).write()
     self.assertEqual(sorted(list(d.df.columns)), ["a", "bin0", "bin1"])
     self.assertAllClose(d.df.values,
                         np.array([[0.0, 0.0, 0.0], [1.0, 1.0, 1.0]]))
     d.write(Path(self.tmpdir.name) / "test.sql")
 def test_sss(self):
     d = Data()
     s = Scanner()
     s.set_no_workers(1)
     s.set_spoints_equidist({"a": (0, 1, 4)})
     s.set_dfunction(func_one)
     s.run(d).write()
     c = KmeansCluster()
     c.set_kmeans_options(n_clusters=2)
     ssst = SubSampleStabilityTester()
     ssst.set_sampling(frac=0.95)
     ssst.set_repeat(2)
     ssst.run(data=d, cluster=c)
Ejemplo n.º 9
0
 def test_run_simple_bins_sample(self):
     s = Scanner()
     d = Data()
     s.set_spoints_equidist({"a": (0, 2, 3)})
     s.set_dfunction(func_sum_indentity_x, sampling=[0, 1, 2])
     s.run(d).write()
     self.assertEqual(sorted(list(d.df.columns)),
                      ["a", "bin0", "bin1", "bin2"])
     print(d.df.values)
     self.assertAllClose(
         d.df.values,
         np.array([
             [0.0, 0.0, 0.0, 0.0],
             [1.0, 0.0, 1.0, 2.0],
             [2.0, 0.0, 2.0, 4.0],
         ]),
     )
     d.write(Path(self.tmpdir.name) / "test.sql")
    def test_noisy_sample(self):
        d = Data()
        s = Scanner()
        s.set_no_workers(1)
        s.set_spoints_equidist({"a": (0, 1, 2)})
        s.set_dfunction(func_zero)
        ns = NoisySample()
        ns.set_repeat(1)
        ns.set_noise("gauss", mean=0.0, sigma=1 / 30 / 4)
        nsr = ns.run(scanner=s, data=d)
        self.assertEqual(len(nsr.samples), 2)
        nsr.write(self.tmpdir.name, non_empty="raise")
        nsr_loaded = NoisySampleResult.load(self.tmpdir.name)
        for i in range(2):
            self.assertDictEqual(
                nsr.samples[i].df.to_dict(), nsr_loaded.samples[i].df.to_dict()
            )

        c = KmeansCluster()
        c.set_kmeans_options(n_clusters=2)
        nsst = NoisySampleStabilityTester()
        nsst.run(nsr, cluster=c)
def _data():
    ddir = Path(__file__).parent / "data"
    dname = "1d.sql"
    d = Data(ddir / dname)
    return d
Ejemplo n.º 12
0
 def setUp(self):
     self.ddir = Path(__file__).parent / "data"
     self.dname = "1d_clustered.sql"
     self.d = Data(self.ddir / self.dname)
Ejemplo n.º 13
0
 def setUp(self):
     path = Path(__file__).parent / "data" / "test.sql"
     self.data = [[100, 200], [400, 500]]
     self.d = Data(path)
Ejemplo n.º 14
0
 def setUp(self):
     path = Path(__file__).parent / "data" / "test_longer.sql"
     self.d = Data(path)