def test_create_sparams_file(self, caplog): clust = wrapper.Input() name = os.path.join(here, dir_data, "sample-real-location.tsv") clust.add_input_data(name, "real location") clust.prepare_input_data() clust.create_sparams_file() assert os.path.isfile("autoclass.s-params")
def test_create_sparams_file_repro_run(self, caplog): clust = wrapper.Input() name = os.path.join(here, dir_data, "sample-real-location.tsv") clust.add_input_data(name, "real location") clust.prepare_input_data() clust.create_sparams_file(reproducible_run=True) assert os.path.isfile("autoclass.s-params") f_content = open("autoclass.s-params", "r").read() assert "randomize_random_p = false" in f_content assert 'start_fn_type = "block"' in f_content
def test_create_hd2_file(self, caplog): clust = wrapper.Input() name1 = os.path.join(here, dir_data, "sample-real-location.tsv") clust.add_input_data(name1, "real location") name2 = os.path.join(here, dir_data, "sample-discrete.tsv") clust.add_input_data(name2, "discrete") name3 = os.path.join(here, dir_data, "sample-real-scalar.tsv") clust.add_input_data(name3, "real scalar") clust.prepare_input_data() clust.create_hd2_file() assert os.path.isfile("autoclass.hd2")
def test_prepare_input_data(self, caplog): clust = wrapper.Input() name1 = os.path.join(here, dir_data, "sample-real-location.tsv") clust.add_input_data(name1, "real location") name2 = os.path.join(here, dir_data, "sample-discrete.tsv") clust.add_input_data(name2, "discrete") name3 = os.path.join(here, dir_data, "sample-real-scalar.tsv") clust.add_input_data(name3, "real scalar") clust.prepare_input_data() print(caplog.text) assert "Final dataframe has 10 lines and 8 columns" in caplog.text
def test_create_rparams_file(self): filename = "autoclass.r-params" clust = wrapper.Input() clust.create_rparams_file() assert os.path.isfile(filename) content_ref = "" filename_ref = os.path.join(here, "test_data", filename) with open(filename_ref, "r") as f: content_ref = f.read() content = "" with open(filename, "r") as f: content = f.read() assert content == content_ref
def test_print_files(self): clust = wrapper.Input() name = os.path.join(here, dir_data, "sample-real-location.tsv") clust.add_input_data(name, "real location") clust.prepare_input_data() clust.create_hd2_file() clust.create_model_file() clust.create_sparams_file() clust.create_rparams_file() content = clust.print_files() assert "autoclass.hd2" in content assert "autoclass.model" in content assert "autoclass.s-params" in content assert "autoclass.r-params" in content
def test_init(self, tmp_dir): clust = wrapper.Input() assert clust.root_name == "autoclass" assert clust.db2_missing_char == "?" assert clust.db2_separator_char == "\t" assert clust.tolerate_error == False
def test_add_input_data_dup_col_names(self, caplog): clust = wrapper.Input() name = os.path.join(here, dir_data, "input-dup-col.tsv") clust.add_input_data(name, "real location") assert "Found duplicate column names" in caplog.text
def test_add_input_data(self): clust = wrapper.Input() name = os.path.join(here, dir_data, "sample-real-location.tsv") clust.add_input_data(name)