def test_compare_override_quoting(): d = Compare('col', '@>', { 'one': "array['one'::varchar]", 'two': "array['two'::varchar]" }, [], {}, quote_choices=False).quantities assert len(d) == 2 assert_contains(d.values(), "col @> array['one'::varchar]") assert_contains(d.values(), "col @> array['two'::varchar]")
def test_compare_override_quoting(): d = Compare( "col", "@>", { "one": "array['one'::varchar]", "two": "array['two'::varchar]" }, [], {}, quote_choices=False, ).quantities assert len(d) == 2 assert_contains(d.values(), "col @> array['one'::varchar]") assert_contains(d.values(), "col @> array['two'::varchar]")
def test_categorical_nones(): d1 = Categorical('col', { 'vala': 'a', 'valb': 'b', 'valc': 'c', '_NULL': None }, [], {}).quantities d2 = Compare('col', '=', { 'vala': 'a', 'valb': 'b', 'valc': 'c' }, [], {}, op_in_name=False, include_null=True).quantities assert d1 == d2 d3 = Categorical('col', ['a', 'b', 'c', None], [], {}).quantities assert sorted(d1.values()) == sorted(d2.values())
def test_categorical_same_as_compare(): d1 = Categorical("col", { "vala": "a", "valb": "b", "valc": "c" }, [], {}).quantities d2 = Compare("col", "=", { "vala": "a", "valb": "b", "valc": "c" }, [], {}).quantities assert sorted(d1.values()) == sorted(d2.values()) d3 = Categorical("col", { "vala": "a", "valb": "b", "valc": "c" }, [], {}, op_in_name=True).quantities assert d2 == d3
def test_categorical_same_as_compare(): d1 = Categorical('col', { 'vala': 'a', 'valb': 'b', 'valc': 'c' }, [], {}).quantities d2 = Compare('col', '=', { 'vala': 'a', 'valb': 'b', 'valc': 'c' }, [], {}).quantities assert sorted(d1.values()) == sorted(d2.values()) d3 = Categorical('col', { 'vala': 'a', 'valb': 'b', 'valc': 'c' }, [], {}, op_in_name=True).quantities assert d2 == d3
def _build_array_categoricals(self, categorical_config, impute_rules): # TODO: only include null flag where necessary return [ Compare(col=categorical['column'], op='@>', choices={ choice: "array['{}'::varchar]".format(choice) for choice in self._build_choices(categorical) }, function=categorical['metrics'], impute_rules=dict(impute_rules, coltype='array_categorical', **categorical.get('imputation', {})), op_in_name=False, quote_choices=False, include_null=True) for categorical in categorical_config ]
def _build_array_categoricals(self, categorical_config, impute_rules): # TODO: only include null flag where necessary return [ Compare(col=categorical["column"], op="@>", choices={ choice: "array['{}'::varchar]".format(choice) for choice in self._build_choices(categorical) }, function=categorical["metrics"], impute_rules=dict(impute_rules, coltype="array_categorical", **categorical.get("imputation", {})), op_in_name=False, quote_choices=False, include_null=True, coltype=categorical.get('coltype', None)) for categorical in categorical_config ]
def test_categorical_nones(): d1 = Categorical("col", { "vala": "a", "valb": "b", "valc": "c", "_NULL": None }, [], {}).quantities d2 = Compare( "col", "=", { "vala": "a", "valb": "b", "valc": "c" }, [], {}, op_in_name=False, include_null=True, ).quantities assert d1 == d2 d3 = Categorical("col", ["a", "b", "c", None], [], {}).quantities assert sorted(d1.values()) == sorted(d3.values())
def test_compare_dicts(): d = Compare("col", "=", { "vala": "a", "valb": "b", "valc": "c" }, [], {}, include_null=True).quantities assert len(d) == 4 assert len(set(d.values())) == len(d) assert len(set(d.keys())) == len(d) assert_contains(d.values(), "col = 'a'") assert_contains(d.values(), "col = 'b'") assert_contains(d.values(), "col = 'c'") assert_contains(d.keys(), "vala") assert_contains(d.keys(), "valb") assert_contains(d.keys(), "valc") assert_contains(map(str.lower, d.keys()), "null") assert_contains(map(lambda x: x[0].lower(), d.values()), "col is null") d = Compare("col", "<", { "val1": 1, "val2": 2, "val3": 3 }, [], {}, include_null="missing").quantities assert len(d) == 4 assert len(set(d.values())) == len(d) assert len(set(d.keys())) == len(d) assert_contains(d.values(), "col < 1") assert_contains(d.values(), "col < 2") assert_contains(d.values(), "col < 3") assert_contains(map(lambda x: x[0].lower(), d.values()), "null") assert_contains(d.keys(), "val1") assert_contains(d.keys(), "val2") assert_contains(d.keys(), "val3") assert_contains(d.keys(), "missing") d = Compare( "long_column_name", "=", { "really long string key that is similar to others": "really long string value that is similar to others", "really long string key that is like others": "really long string value that is like others", "different key": "really long string value that is quite alike to others", "ni": "really long string value that is also like everything else", }, [], {}, maxlen=32, ).quantities assert len(d) == 4 assert len(set(d.values())) == len(d) assert len(set(d.keys())) == len(d) assert all(len(k) <= 32 for k in d.keys()) assert_contains(d.keys(), "differ") assert_contains( d.values(), "long_column_name = 'really long string value that is similar to others'", ) assert_contains( d.values(), "long_column_name = 'really long string value that is like others'") assert_contains( d.values(), "long_column_name = 'really long string value that is quite alike to others'", ) assert_contains( d.values(), "long_column_name = 'really long string value that is also like everything else'", )
def test_compare_lists(): d = Compare("col", "=", ["a", "b", "c"], [], {}, include_null=True).quantities assert len(d) == 4 assert len(set(d.values())) == len(d) assert len(set(d.keys())) == len(d) assert_contains(d.values(), "col = 'a'") assert_contains(d.values(), "col = 'b'") assert_contains(d.values(), "col = 'c'") assert_contains(map(lambda x: x[0].lower(), d.values()), "col is null") d = Compare("col", ">", [1, 2, 3], [], {}).quantities assert len(d) == 3 assert len(set(d.values())) == len(d) assert len(set(d.keys())) == len(d) assert_contains(d.values(), "col > 1") assert_contains(d.values(), "col > 2") assert_contains(d.values(), "col > 3") d = Compare("col", "=", ["a", "b", "c"], [], {}, include_null=False).quantities assert len(d) == 3 assert len(set(d.values())) == len(d) assert len(set(d.keys())) == len(d) assert_contains(d.values(), "col = 'a'") assert_contains(d.values(), "col = 'b'") assert_contains(d.values(), "col = 'c'") d = Compare( "really_long_column_name", "=", [ "really long string value that is similar to others", "really long string value that is like others", "really long string value that is quite alike to others", "really long string value that is also like everything else", ], [], {}, maxlen=32, ).quantities assert len(d) == 4 assert len(set(d.values())) == len(d) assert len(set(d.keys())) == len(d) assert all(len(k) <= 32 for k in d.keys()) assert_contains( d.values(), "really_long_column_name = 'really long string value that is similar to others'", ) assert_contains( d.values(), "really_long_column_name = 'really long string value that is like others'", ) assert_contains( d.values(), "really_long_column_name = 'really long string value that is quite alike to others'", ) assert_contains( d.values(), "really_long_column_name = 'really long string value that is also like everything else'", )
def test_compare_dicts(): d = Compare('col', '=', { 'vala': 'a', 'valb': 'b', 'valc': 'c' }, [], {}, include_null=True).quantities assert len(d) == 4 assert len(set(d.values())) == len(d) assert len(set(d.keys())) == len(d) assert_contains(d.values(), "col = 'a'") assert_contains(d.values(), "col = 'b'") assert_contains(d.values(), "col = 'c'") assert_contains(d.keys(), 'vala') assert_contains(d.keys(), 'valb') assert_contains(d.keys(), 'valc') assert_contains(map(str.lower, d.keys()), 'null') assert_contains(map(lambda x: x[0].lower(), d.values()), "col is null") d = Compare('col', '<', { 'val1': 1, 'val2': 2, 'val3': 3 }, [], {}, include_null='missing').quantities assert len(d) == 4 assert len(set(d.values())) == len(d) assert len(set(d.keys())) == len(d) assert_contains(d.values(), "col < 1") assert_contains(d.values(), "col < 2") assert_contains(d.values(), "col < 3") assert_contains(map(lambda x: x[0].lower(), d.values()), "null") assert_contains(d.keys(), 'val1') assert_contains(d.keys(), 'val2') assert_contains(d.keys(), 'val3') assert_contains(d.keys(), 'missing') d = Compare( 'long_column_name', '=', { 'really long string key that is similar to others': 'really long string value that is similar to others', 'really long string key that is like others': 'really long string value that is like others', 'different key': 'really long string value that is quite alike to others', 'ni': 'really long string value that is also like everything else' }, [], {}, maxlen=32).quantities assert len(d) == 4 assert len(set(d.values())) == len(d) assert len(set(d.keys())) == len(d) assert all(len(k) <= 32 for k in d.keys()) assert_contains(d.keys(), 'differ') assert_contains( d.values(), "long_column_name = 'really long string value that is similar to others'" ) assert_contains( d.values(), "long_column_name = 'really long string value that is like others'") assert_contains( d.values(), "long_column_name = 'really long string value that is quite alike to others'" ) assert_contains( d.values(), "long_column_name = 'really long string value that is also like everything else'" )