예제 #1
0
 def test_create_sparams_file(self, caplog):
     clust = wrapper.Input()
     name = os.path.join(here, dir_data, "sample-real-location.tsv")
     clust.add_input_data(name, "real location")
     clust.prepare_input_data()
     clust.create_sparams_file()
     assert os.path.isfile("autoclass.s-params")
예제 #2
0
 def test_create_sparams_file_repro_run(self, caplog):
     clust = wrapper.Input()
     name = os.path.join(here, dir_data, "sample-real-location.tsv")
     clust.add_input_data(name, "real location")
     clust.prepare_input_data()
     clust.create_sparams_file(reproducible_run=True)
     assert os.path.isfile("autoclass.s-params")
     f_content = open("autoclass.s-params", "r").read()
     assert "randomize_random_p = false" in f_content
     assert 'start_fn_type = "block"' in f_content
예제 #3
0
 def test_create_hd2_file(self, caplog):
     clust = wrapper.Input()
     name1 = os.path.join(here, dir_data, "sample-real-location.tsv")
     clust.add_input_data(name1, "real location")
     name2 = os.path.join(here, dir_data, "sample-discrete.tsv")
     clust.add_input_data(name2, "discrete")
     name3 = os.path.join(here, dir_data, "sample-real-scalar.tsv")
     clust.add_input_data(name3, "real scalar")
     clust.prepare_input_data()
     clust.create_hd2_file()
     assert os.path.isfile("autoclass.hd2")
예제 #4
0
 def test_prepare_input_data(self, caplog):
     clust = wrapper.Input()
     name1 = os.path.join(here, dir_data, "sample-real-location.tsv")
     clust.add_input_data(name1, "real location")
     name2 = os.path.join(here, dir_data, "sample-discrete.tsv")
     clust.add_input_data(name2, "discrete")
     name3 = os.path.join(here, dir_data, "sample-real-scalar.tsv")
     clust.add_input_data(name3, "real scalar")
     clust.prepare_input_data()
     print(caplog.text)
     assert "Final dataframe has 10 lines and 8 columns" in caplog.text
예제 #5
0
 def test_create_rparams_file(self):
     filename = "autoclass.r-params"
     clust = wrapper.Input()
     clust.create_rparams_file()
     assert os.path.isfile(filename)
     content_ref = ""
     filename_ref = os.path.join(here, "test_data", filename)
     with open(filename_ref, "r") as f:
         content_ref = f.read()
     content = ""
     with open(filename, "r") as f:
         content = f.read()
     assert content == content_ref
예제 #6
0
 def test_print_files(self):
     clust = wrapper.Input()
     name = os.path.join(here, dir_data, "sample-real-location.tsv")
     clust.add_input_data(name, "real location")
     clust.prepare_input_data()
     clust.create_hd2_file()
     clust.create_model_file()
     clust.create_sparams_file()
     clust.create_rparams_file()
     content = clust.print_files()
     assert "autoclass.hd2" in content
     assert "autoclass.model" in content
     assert "autoclass.s-params" in content
     assert "autoclass.r-params" in content
예제 #7
0
 def test_init(self, tmp_dir):
     clust = wrapper.Input()
     assert clust.root_name == "autoclass"
     assert clust.db2_missing_char == "?"
     assert clust.db2_separator_char == "\t"
     assert clust.tolerate_error == False
예제 #8
0
 def test_add_input_data_dup_col_names(self, caplog):
     clust = wrapper.Input()
     name = os.path.join(here, dir_data, "input-dup-col.tsv")
     clust.add_input_data(name, "real location")
     assert "Found duplicate column names" in caplog.text
예제 #9
0
 def test_add_input_data(self):
     clust = wrapper.Input()
     name = os.path.join(here, dir_data, "sample-real-location.tsv")
     clust.add_input_data(name)