def test_mixed_schema(self): a_1 = {'a': 1.0, 'b': 2.0} a_2 = {'a': 3.0, 'c': 4.0} b = [a_1, a_2] node = from_object(b) flattened = node.flatten(name_strategy=NameStrategy.CONCATENATE_ALWAYS) self.df_equality({'a': [1, 3], 'b': [2, np.nan], 'c': [np.nan, 4]}, flattened)
def test_simple_flatten(self): a1 = 1 a2 = 2 a = SimpleObject(a1) b = SimpleObject(a2) d = {'a': a, 'b': b} node = from_object(d) flattened = node.flatten(name_strategy=NameStrategy.CONCATENATE_ALWAYS) self.df_equality({'a_value': [1], 'b_value': [2]}, flattened)
def test_null_list(self): b = [1.0, 1.0, 5.0] c = {'b': b, 'c': 2.0} c_null = {'b': None, 'c': 3.0} d = [c, c_null] c = {'d': d, 'e': 4.0} node = from_object(c) flattened = node.flatten(name_strategy=NameStrategy.CONCATENATE_ALWAYS) self.df_equality({'d_b': [1, 1, 5], 'd_c': [2, 2, 2], 'e': [4, 4, 4]}, flattened)
def test_null_primitive(self): a = 1.0 b = [a, a, None] c = {'b': b} d = [c, c] c = {'d': d, 'e': 2.0} node = from_object(c) flattened = node.flatten(name_strategy=NameStrategy.CONCATENATE_ALWAYS) self.df_equality({'d_b': [1, 1, np.nan, 1, 1, np.nan], 'e': [2, 2, 2, 2, 2, 2]}, flattened)
def test_flatten_multiple_lists(self): a = [1, 2] b = [3, 4] c = 1 d = {'a': a, 'b': b, 'c': c} node = from_object(d) self.assertRaises(ValueError, node.flatten) flattened = node.flatten(flatten_strategy=FlattenStrategy.FLATTEN_AVAILABLE) self.df_equality({'c': [c]}, flattened)
def test_flatten_list(self): a1 = 1.0 a2 = None a = SimpleObject(a1) b = SimpleObject(a2) c = ListObject([a, b]) d = {'a': c, 'b': 3} node = from_object(d) flattened = node.flatten() self.df_equality({'value': [1, np.nan], 'b': [3, 3]}, flattened)
def test_flatten_nested_list(self): a1 = 1.0 a2 = None a = SimpleObject(a1) b = SimpleObject(a2) c = ListObject([a, b]) c_a = ListObject([c, c]) d = {'a': c_a, 'b': 3} node = from_object(d) flattened = node.flatten(name_strategy=NameStrategy.CONCATENATE_ALWAYS) self.df_equality({'a_values_values_value': [1, np.nan, 1, np.nan], 'b': [3, 3, 3, 3]}, flattened)
def test_attr(self): a1 = 1.0 a2 = None a = SimpleObject(a1) b = SimpleObject(a2) c = ListObject([a, b]) d = SimpleObject(c) node = from_object(d) primitive_node = node.value.values.value flattened = primitive_node.flatten() self.df_equality({'value': [1, np.nan]}, flattened)
def test_flatten_null(self): # this demonstrates that when we can't determine the schema, we drop the field entirely. for data formats where # the schema is known even when there is no data, should we have a way to fill out an empty node? I guess that # is dependent on the converter, as it can create the necessary "empty" nodes (though it must fill out a # primitive node at the end) a1 = 1 a2 = None a = SimpleObject(a1) b = SimpleObject(a2) d = {'a': a, 'b': b} node = from_object(d) flattened = node.flatten(name_strategy=NameStrategy.CONCATENATE_ALWAYS) self.df_equality({'a_value': [1]}, flattened)
def test_str(self): a = 1.0 b = [a, a, a] c = {'b': b} d = [c, c] c = {'d': d, 'e': 2.0} node = from_object(c) elements = list() strings = {'d': '- d []\n - b []float64', 'e': '- e float64'} for key in node._children: elements.append(strings[key]) s = '\n'.join(elements) text = str(node) self.assertEqual(s, text)
def test_prim_only(self): a = [1, 2, 3] node = from_object(a) flattened = node.flatten() self.df_equality({None: a}, flattened)
def test_incomplete_node(self): node = from_object([]) flattened = node.flatten() self.df_equality({}, flattened)
def test_excluded_lists_are_ignored(self): node = from_object([{'a': None, 'b': [2, 3], 'c': [-2, -3]}, {'a': 1, 'b': [2, 4], 'c': [-2, -4]}]) flattened = node.flatten(include=['a']) self.df_equality({'a': [0, 1]}, flattened) flattened = node.flatten(include=['b']) self.df_equality({'b': [2, 3, 2, 4]}, flattened)
def test_multi_index_naming(self): node = from_object({'a': {'c': 1}, 'b': 2}) flattened = node.flatten(name_strategy=NameStrategy.MULTI_INDEX) self.df_equality({('a', 'c'): [1], ('b',): [2]}, flattened)
def test_conflicting_clusions(self): c = {'a': 1, 'b': 2} node = from_object(c) self.assertRaises(AssertionError, lambda: node.flatten(include={'a'}, exclude={'a'}))
def test_exclude(self): node = from_object({'a': 1, 'b': 2}) flattened = node.flatten(exclude={'b'}) self.df_equality({'a': [1]}, flattened)
def test_strings(self): a1 = 'a' a = SimpleObject(a1) node = from_object(a) flattened = node.flatten(name_strategy=NameStrategy.CONCATENATE_ALWAYS) self.df_equality({'value': 'a'}, flattened)