Beispiel #1
0
    def test_add_element(self):
        # start with empty pipeline without auto-parametrization
        p = api.pipeline([], run=False)
        # add some reader
        reader = api.source(self.traj_files, top=self.pdb_file)
        p.add_element(reader)
        p.parametrize()
        # get the result immediately
        out1 = reader.get_output()

        # add some kmeans
        kmeans = api.cluster_kmeans(k=15)
        p.add_element(kmeans)
        p.parametrize()
        # get the result immediately
        kmeans1 = kmeans.get_output()

        # get reader output again
        out2 = reader.get_output()
        p.add_element(api.kmeans(k=2))
        p.parametrize()

        # get kmeans output again
        kmeans2 = kmeans.get_output()
        # check if add_element changes the intermediate results
        np.testing.assert_array_equal(out1[0], out2[0])
        np.testing.assert_array_equal(out1[1], out2[1])
        np.testing.assert_array_equal(kmeans1[0], kmeans2[0])
        np.testing.assert_array_equal(kmeans1[1], kmeans2[1])
Beispiel #2
0
 def test_np_reader_in_pipeline(self):
     with TemporaryDirectory() as td:
         file_name = os.path.join(td, "test.npy")
         data = np.random.random((100, 3))
         np.save(file_name, data)
         reader = api.source(file_name)
         p = api.pipeline(reader, run=False, stride=2, chunksize=5)
         p.parametrize()
Beispiel #3
0
 def test_chunksize(self):
     reader_xtc = api.source(self.traj_files, top=self.pdb_file)
     chunksize = 1001
     chain = [reader_xtc, api.tica(), api.cluster_mini_batch_kmeans()]
     p = api.pipeline(chain, chunksize=chunksize)
     assert p.chunksize == chunksize
     for e in p._chain:
         assert e.chunksize == chunksize
Beispiel #4
0
 def test_np_reader_in_pipeline(self):
     with tempfile.NamedTemporaryFile(suffix='.npy', delete=False) as f:
         data = np.random.random((100, 3))
         np.save(f.name, data)
         reader = api.source(f.name)
         p = api.pipeline(reader, run=False, stride=2, chunksize=5)
         assert reader._parametrized
         p.parametrize()
         assert reader._parametrized
Beispiel #5
0
 def test_no_transform(self):
     reader_xtc = api.source(self.traj_files, top=self.pdb_file)
     api.pipeline([reader_xtc,
                   api.cluster_kmeans(k=10)])._chain[-1].get_output()
     api.pipeline([reader_xtc,
                   api.cluster_regspace(dmin=10)])._chain[-1].get_output()
     api.pipeline([reader_xtc,
                   api.cluster_uniform_time()])._chain[-1].get_output()
Beispiel #6
0
 def test_set_element(self):
     reader = api.source(self.traj_files, top=self.pdb_file)
     pca = api.pca()
     p = api.pipeline([reader, pca])
     self.assertTrue(p._is_parametrized())
     pca_out = pca.get_output()
     tica = api.tica(lag=self.generated_lag)
     # replace pca with tica
     p.set_element(1, tica)
     self.assertFalse(p._is_parametrized(), "After replacing an element, the pipeline should not be parametrized.")
     p.parametrize()
     tica_out = tica.get_output()
     # check if replacement actually happened
     self.assertFalse(np.array_equal(pca_out[0], tica_out[0]),
                      "The output should not be the same when the method got replaced.")
Beispiel #7
0
 def test_is_parametrized(self):
     # construct pipeline with all possible transformers
     p = api.pipeline(
         [
             api.source(self.traj_files, top=self.pdb_file),
             api.tica(),
             api.pca(),
             api.cluster_kmeans(k=50),
             api.cluster_regspace(dmin=50),
             api.cluster_uniform_time(k=20)
         ], run=False
     )
     self.assertFalse(p._is_parametrized(), "If run=false, the pipeline should not be parametrized.")
     p.parametrize()
     self.assertTrue(p._is_parametrized(), "If parametrized was called, the pipeline should be parametrized.")
Beispiel #8
0
    def test_replace_data_source(self):
        reader_xtc = api.source(self.traj_files, top=self.pdb_file)
        reader_gen = DataInMemory(data=self.generated_data)

        kmeans = api.kmeans(k=10)
        assert hasattr(kmeans, '_chunks')
        p = api.pipeline([reader_xtc, kmeans])
        out1 = kmeans.get_output()
        # replace source
        print reader_gen
        p.set_element(0, reader_gen)
        assert hasattr(kmeans, '_chunks')
        p.parametrize()
        out2 = kmeans.get_output()
        self.assertFalse(np.array_equal(out1, out2), "Data source changed, so should the resulting clusters.")
Beispiel #9
0
 def test_no_cluster(self):
     reader_xtc = api.source(self.traj_files, top=self.pdb_file)
     # only reader
     api.pipeline(reader_xtc)
     reader_xtc.get_output()
     # reader + pca / tica
     tica = api.tica()
     pca = api.pca()
     api.pipeline([reader_xtc, tica])._chain[-1].get_output()
     api.pipeline([reader_xtc, pca])._chain[-1].get_output()
Beispiel #10
0
 def test_no_transform(self):
     reader_xtc = api.source(self.traj_files, top=self.pdb_file)
     api.pipeline([reader_xtc, api.cluster_kmeans(k=10)])._chain[-1].get_output()
     api.pipeline([reader_xtc, api.cluster_regspace(dmin=10)])._chain[-1].get_output()
     api.pipeline([reader_xtc, api.cluster_uniform_time()])._chain[-1].get_output()