def varying_between(table, idvarlist): """ Return a list of all variables with non constant values between groups defined by `idvarlist`. """ def inst_key(inst, vars): return tuple(str(inst[var]) for var in vars) excluded = set(idvarlist) all_possible = [ var for var in table.domain.variables + table.domain.metas if var not in excluded ] candidate_set = set(all_possible) idmap = group_table_indices(table, idvarlist) values = {} varying = set() for indices in idmap.values(): subset = table[indices] for var in list(candidate_set): values = subset[:, var] values, _ = subset.get_column_view(var) if isinstance(var, Orange.data.StringVariable): uniq = set(values) else: uniq = unique_non_nan(values) if len(uniq) > 1: varying.add(var) candidate_set.remove(var) return sorted(varying, key=all_possible.index)
def varying_between(table, idvarlist): """ Return a list of all variables with non constant values between groups defined by `idvarlist`. """ def inst_key(inst, vars): return tuple(str(inst[var]) for var in vars) excluded = set(idvarlist) all_possible = [var for var in table.domain.variables + table.domain.metas if var not in excluded] candidate_set = set(all_possible) idmap = group_table_indices(table, idvarlist) values = {} varying = set() for indices in idmap.values(): subset = table[indices] for var in list(candidate_set): values = subset[:, var] values, _ = subset.get_column_view(var) if var.is_string: uniq = set(values) else: uniq = unique_non_nan(values) if len(uniq) > 1: varying.add(var) candidate_set.remove(var) return sorted(varying, key=all_possible.index)
def test_group_table_indices(self): table = Table("test9.tab") dd = defaultdict(list) dd[("1",)] = [0, 1] dd[("huh",)] = [2] dd[("hoy",)] = [3] dd[("?",)] = [4] dd[("2",)] = [5] dd[("oh yeah",)] = [6] dd[("3",)] = [7] self.assertEqual(dd, group_table_indices(table, ["g"]))
def test_group_table_indices(self): table = Table(test_filename("test9.tab")) dd = defaultdict(list) dd[("1",)] = [0, 1] dd[("huh",)] = [2] dd[("hoy",)] = [3] dd[("?",)] = [4] dd[("2",)] = [5] dd[("oh yeah",)] = [6] dd[("3",)] = [7] self.assertEqual(dd, group_table_indices(table, ["g"]))
def test_group_table_indices(self): table = Table(test_filename("test9.tab")) dd = defaultdict(list) dd["1"] = [0, 1] dd["huh"] = [2] dd["hoy"] = [3] dd["?"] = [4] dd["2"] = [5] dd["oh yeah"] = [6] dd["3"] = [7] self.assertEqual(dd, group_table_indices(table, "g"))