def test_venn_diagram(self): sources = ["SVM Learner", "Naive Bayes", "Random Forest"] item_id_var = StringVariable("item_id") source_var = StringVariable("source") table = Table("zoo") class_var = table.domain.class_var cv = np.random.randint(len(class_var.values), size=(3, len(sources))) tables = [] for i in range(len(sources)): temp_table = Table.from_table(table.domain, table, [0 + i, 1 + i, 2 + i]) temp_d = ( DiscreteVariable("%s(%s)" % (class_var.name, sources[0 + i]), class_var.values), source_var, item_id_var, ) temp_m = np.array( [ [cv[0, i], sources[i], table.metas[0 + i, 0]], [cv[1, i], sources[i], table.metas[1 + i, 0]], [cv[2, i], sources[i], table.metas[2 + i, 0]], ], dtype=object, ) temp_table = self.add_metas(temp_table, temp_d, temp_m) tables.append(temp_table) data = table_concat(tables) varying = varying_between(data, item_id_var) if source_var in varying: varying.remove(source_var) data = reshape_wide(data, varying, [item_id_var], [source_var]) data = drop_columns(data, [item_id_var]) result = np.array( [ [table.metas[0, 0], cv[0, 0], np.nan, np.nan], [table.metas[1, 0], cv[1, 0], cv[0, 1], np.nan], [table.metas[2, 0], cv[2, 0], cv[1, 1], cv[0, 2]], [table.metas[3, 0], np.nan, cv[2, 1], cv[1, 2]], [table.metas[4, 0], np.nan, np.nan, cv[2, 2]], ], dtype=object, ) for i in range(len(result)): for j in range(len(result[0])): val = result[i][j] if isinstance(val, float) and np.isnan(val): self.assertTrue(np.isnan(data.metas[i][j])) else: np.testing.assert_equal(data.metas[i][j], result[i][j])
def test_venn_diagram(self): sources = ["SVM Learner", "Naive Bayes", "Random Forest"] item_id_var = StringVariable("item_id") source_var = StringVariable("source") table = Table("zoo") class_var = table.domain.class_var cv = np.random.randint(len(class_var.values), size=(3, len(sources))) tables = [] # pylint: disable=consider-using-enumerate for i in range(len(sources)): temp_table = Table.from_table(table.domain, table, [0 + i, 1 + i, 2 + i]) temp_d = (DiscreteVariable("%s(%s)" % (class_var.name, sources[0 + i]), class_var.values), source_var, item_id_var) temp_m = np.array([[cv[0, i], sources[i], table.metas[0 + i, 0]], [cv[1, i], sources[i], table.metas[1 + i, 0]], [cv[2, i], sources[i], table.metas[2 + i, 0]]], dtype=object) temp_table = self.add_metas(temp_table, temp_d, temp_m) tables.append(temp_table) data = table_concat(tables) varying = varying_between(data, item_id_var) if source_var in varying: varying.remove(source_var) data = reshape_wide(data, varying, [item_id_var], [source_var]) data = drop_columns(data, [item_id_var]) result = np.array([[table.metas[0, 0], cv[0, 0], np.nan, np.nan], [table.metas[1, 0], cv[1, 0], cv[0, 1], np.nan], [table.metas[2, 0], cv[2, 0], cv[1, 1], cv[0, 2]], [table.metas[3, 0], np.nan, cv[2, 1], cv[1, 2]], [table.metas[4, 0], np.nan, np.nan, cv[2, 2]]], dtype=object) for i in range(len(result)): for j in range(len(result[0])): val = result[i][j] if isinstance(val, float) and np.isnan(val): self.assertTrue(np.isnan(data.metas[i][j])) else: np.testing.assert_equal(data.metas[i][j], result[i][j])