def test_diff_records_str_values(self): lhs = [ {'name': 'a', 'sheep': '7'}, {'name': 'b', 'sheep': '12'}, {'name': 'c', 'sheep': '0'}, ] rhs = [ {'name': 'a', 'sheep': '7'}, {'name': 'c', 'sheep': '2'}, {'name': 'd', 'sheep': '8'}, ] diff = csvdiff.diff_records(lhs, rhs, ['name']) assert patch.is_valid(diff) assert not patch.is_typed(diff) # check the contents of the diff self.assertEqual(diff['added'], [ {'name': 'd', 'sheep': '8'} ]) self.assertEqual(diff['removed'], [ {'name': 'b', 'sheep': '12'} ]) self.assertEqual(diff['changed'], [ {'key': ['c'], 'fields': {'sheep': {'from': '0', 'to': '2'}}} ]) # check that we can apply the diff patched = csvdiff.patch_records(diff, lhs) self.assertRecordsEqual(rhs, patched)
def test_diff_records_multikey(self): lhs = [ {'name': 'a', 'type': 1, 'sheep': 7}, {'name': 'b', 'type': 1, 'sheep': 12}, {'name': 'c', 'type': 1, 'sheep': 0}, ] rhs = [ {'name': 'a', 'type': 1, 'sheep': 7}, {'name': 'c', 'type': 1, 'sheep': 2}, {'name': 'd', 'type': 1, 'sheep': 8}, ] diff = csvdiff.diff_records(lhs, rhs, ['name', 'type']) assert patch.is_valid(diff) assert patch.is_typed(diff) self.assertEqual(diff['added'], [ {'name': 'd', 'sheep': 8, 'type': 1} ]) self.assertEqual(diff['removed'], [ {'name': 'b', 'sheep': 12, 'type': 1} ]) self.assertEqual(diff['changed'], [ {'key': ['c', 1], 'fields': {'sheep': {'from': 0, 'to': 2}}} ]) # check that we can apply the diff patched = csvdiff.patch_records(diff, lhs) self.assertRecordsEqual(rhs, patched)
def test_diff_records_multikey(self): lhs = [ { 'name': 'a', 'type': 1, 'sheep': 7 }, { 'name': 'b', 'type': 1, 'sheep': 12 }, { 'name': 'c', 'type': 1, 'sheep': 0 }, ] rhs = [ { 'name': 'a', 'type': 1, 'sheep': 7 }, { 'name': 'c', 'type': 1, 'sheep': 2 }, { 'name': 'd', 'type': 1, 'sheep': 8 }, ] diff = csvdiff.diff_records(lhs, rhs, ['name', 'type']) assert patch.is_valid(diff) assert patch.is_typed(diff) self.assertEqual(diff['added'], [{'name': 'd', 'sheep': 8, 'type': 1}]) self.assertEqual(diff['removed'], [{ 'name': 'b', 'sheep': 12, 'type': 1 }]) self.assertEqual(diff['changed'], [{ 'key': ['c', 1], 'fields': { 'sheep': { 'from': 0, 'to': 2 } } }]) # check that we can apply the diff patched = csvdiff.patch_records(diff, lhs) self.assertRecordsEqual(rhs, patched)
def test_diff_records_multikey(self): lhs = { ('a', 1): { 'name': 'a', 'type': 1, 'sheep': 7 }, ('b', 1): { 'name': 'b', 'type': 1, 'sheep': 12 }, ('c', 1): { 'name': 'c', 'type': 1, 'sheep': 0 }, } rhs = { ('a', 1): { 'name': 'a', 'type': 1, 'sheep': 7 }, ('c', 1): { 'name': 'c', 'type': 1, 'sheep': 2 }, ('d', 1): { 'name': 'd', 'type': 1, 'sheep': 8 }, } diff = csvdiff.diff_records(lhs, rhs) self.assertEqual(diff['added'], [{'name': 'd', 'sheep': 8, 'type': 1}]) self.assertEqual(diff['removed'], [{ 'name': 'b', 'sheep': 12, 'type': 1 }]) self.assertEqual(diff['changed'], [{ 'key': ('c', 1), 'fields': { 'sheep': { 'from': 0, 'to': 2 } } }]) self.assertEqual(set(diff), set(['added', 'removed', 'changed']))
def compare_data(latest, old): """ Compares the pandas dataframe of two dataset classes """ latest = latest.dropna() old = old.dropna() latest['id'] = latest.apply(lambda x: hash(tuple(x)), axis=1) old['id'] = old.apply(lambda x: hash(tuple(x)), axis=1) old_records = old.to_dict("records") latest_records = latest.to_dict("records") data_diff = csvdiff.diff_records(old_records, latest_records, ['id']) return data_diff
def compare_data(old, new): """ Compares the pandas dataframe of two dataset classes """ new = new.dropna() old = old.dropna() new['id'] = new.apply(lambda x: hash(tuple(x)), axis=1) old['id'] = old.apply(lambda x: hash(tuple(x)), axis=1) old_records = old.to_dict("records") new_records = new.to_dict("records") data_diff = csvdiff.diff_records(old_records, new_records, ['id']) return data_diff
def test_diff_records_str_values(self): lhs = [ { 'name': 'a', 'sheep': '7' }, { 'name': 'b', 'sheep': '12' }, { 'name': 'c', 'sheep': '0' }, ] rhs = [ { 'name': 'a', 'sheep': '7' }, { 'name': 'c', 'sheep': '2' }, { 'name': 'd', 'sheep': '8' }, ] diff = csvdiff.diff_records(lhs, rhs, ['name']) assert patch.is_valid(diff) assert not patch.is_typed(diff) # check the contents of the diff self.assertEqual(diff['added'], [{'name': 'd', 'sheep': '8'}]) self.assertEqual(diff['removed'], [{'name': 'b', 'sheep': '12'}]) self.assertEqual(diff['changed'], [{ 'key': ['c'], 'fields': { 'sheep': { 'from': '0', 'to': '2' } } }]) # check that we can apply the diff patched = csvdiff.patch_records(diff, lhs) self.assertRecordsEqual(rhs, patched)
def run(self, dump: bool): self.test_run() if dump: self.dump_result() else: bas = self.bas_result() reg = self.reg_result() keys_map = self.table_keys() for bas_name in bas: bas_values = bas[bas_name] keys = keys_map[bas_name] reg_values = reg[bas_name] diff = csvdiff.diff_records(bas_values, reg_values, keys) assert_results(diff, bas_name)
def test_summarize_identical(self): lhs = [ {'name': 'a', 'sheep': '7'}, {'name': 'b', 'sheep': '12'}, {'name': 'c', 'sheep': '0'}, ] diff = csvdiff.diff_records(lhs, lhs, ['name']) assert patch.is_valid(diff) assert not patch.is_typed(diff) o = StringIO() csvdiff._summarize_diff(diff, len(lhs), stream=o) self.assertEqual( o.getvalue(), 'files are identical\n' )
def test_diff_records(self): lhs = { 'a': { 'name': 'a', 'sheep': 7 }, 'b': { 'name': 'b', 'sheep': 12 }, 'c': { 'name': 'c', 'sheep': 0 }, } rhs = { 'a': { 'name': 'a', 'sheep': 7 }, 'c': { 'name': 'c', 'sheep': 2 }, 'd': { 'name': 'd', 'sheep': 8 }, } diff = csvdiff.diff_records(lhs, rhs) self.assertEqual(diff['added'], [{'name': 'd', 'sheep': 8}]) self.assertEqual(diff['removed'], [{'name': 'b', 'sheep': 12}]) self.assertEqual(diff['changed'], [{ 'key': 'c', 'fields': { 'sheep': { 'from': 0, 'to': 2 } } }]) self.assertEqual(set(diff), set(['added', 'removed', 'changed']))
def test_summarize(self): lhs = { 'a': {'name': 'a', 'sheep': 7}, 'b': {'name': 'b', 'sheep': 12}, 'c': {'name': 'c', 'sheep': 0}, } rhs = { 'a': {'name': 'a', 'sheep': 7}, 'c': {'name': 'c', 'sheep': 2}, 'd': {'name': 'd', 'sheep': 8}, } diff = csvdiff.diff_records(lhs, rhs) o = StringIO() csvdiff.summarize_diff(diff, len(lhs), stream=o) self.assertEqual( o.getvalue(), "1 rows removed (33.3%)\n" "1 rows added (33.3%)\n" "1 rows changed (33.3%)\n" )
def test_summarize_identical(self): lhs = [ { 'name': 'a', 'sheep': '7' }, { 'name': 'b', 'sheep': '12' }, { 'name': 'c', 'sheep': '0' }, ] diff = csvdiff.diff_records(lhs, lhs, ['name']) assert patch.is_valid(diff) assert not patch.is_typed(diff) o = StringIO() csvdiff._summarize_diff(diff, len(lhs), stream=o) self.assertEqual(o.getvalue(), 'files are identical\n')
def test_summarize(self): lhs = [ {'name': 'a', 'sheep': '7'}, {'name': 'b', 'sheep': '12'}, {'name': 'c', 'sheep': '0'}, ] rhs = [ {'name': 'a', 'sheep': '7'}, {'name': 'c', 'sheep': '2'}, {'name': 'd', 'sheep': '8'}, ] diff = csvdiff.diff_records(lhs, rhs, ['name']) assert patch.is_valid(diff) assert not patch.is_typed(diff) o = StringIO() csvdiff._summarize_diff(diff, len(lhs), stream=o) self.assertEqual( o.getvalue(), "1 rows removed (33.3%)\n" "1 rows added (33.3%)\n" "1 rows changed (33.3%)\n" )
def test_summarize(self): lhs = [ { 'name': 'a', 'sheep': '7' }, { 'name': 'b', 'sheep': '12' }, { 'name': 'c', 'sheep': '0' }, ] rhs = [ { 'name': 'a', 'sheep': '7' }, { 'name': 'c', 'sheep': '2' }, { 'name': 'd', 'sheep': '8' }, ] diff = csvdiff.diff_records(lhs, rhs, ['name']) assert patch.is_valid(diff) assert not patch.is_typed(diff) o = StringIO() csvdiff._summarize_diff(diff, len(lhs), stream=o) self.assertEqual( o.getvalue(), "1 rows removed (33.3%)\n" "1 rows added (33.3%)\n" "1 rows changed (33.3%)\n")
def test_diff_records(self): lhs = { 'a': {'name': 'a', 'sheep': 7}, 'b': {'name': 'b', 'sheep': 12}, 'c': {'name': 'c', 'sheep': 0}, } rhs = { 'a': {'name': 'a', 'sheep': 7}, 'c': {'name': 'c', 'sheep': 2}, 'd': {'name': 'd', 'sheep': 8}, } diff = csvdiff.diff_records(lhs, rhs) self.assertEqual(diff['added'], [ {'name': 'd', 'sheep': 8} ]) self.assertEqual(diff['removed'], [ {'name': 'b', 'sheep': 12} ]) self.assertEqual(diff['changed'], [ {'key': 'c', 'fields': {'sheep': {'from': 0, 'to': 2}}} ]) self.assertEqual(set(diff), set(['added', 'removed', 'changed']))
def test_diff_records_multikey(self): lhs = { ('a', 1): {'name': 'a', 'type': 1, 'sheep': 7}, ('b', 1): {'name': 'b', 'type': 1, 'sheep': 12}, ('c', 1): {'name': 'c', 'type': 1, 'sheep': 0}, } rhs = { ('a', 1): {'name': 'a', 'type': 1, 'sheep': 7}, ('c', 1): {'name': 'c', 'type': 1, 'sheep': 2}, ('d', 1): {'name': 'd', 'type': 1, 'sheep': 8}, } diff = csvdiff.diff_records(lhs, rhs) self.assertEqual(diff['added'], [ {'name': 'd', 'sheep': 8, 'type': 1} ]) self.assertEqual(diff['removed'], [ {'name': 'b', 'sheep': 12, 'type': 1} ]) self.assertEqual(diff['changed'], [ {'key': ('c', 1), 'fields': {'sheep': {'from': 0, 'to': 2}}} ]) self.assertEqual(set(diff), set(['added', 'removed', 'changed']))
def test_summarize(self): lhs = { 'a': { 'name': 'a', 'sheep': 7 }, 'b': { 'name': 'b', 'sheep': 12 }, 'c': { 'name': 'c', 'sheep': 0 }, } rhs = { 'a': { 'name': 'a', 'sheep': 7 }, 'c': { 'name': 'c', 'sheep': 2 }, 'd': { 'name': 'd', 'sheep': 8 }, } diff = csvdiff.diff_records(lhs, rhs) o = StringIO() csvdiff.summarize_diff(diff, len(lhs), stream=o) self.assertEqual( o.getvalue(), "1 rows removed (33.3%)\n" "1 rows added (33.3%)\n" "1 rows changed (33.3%)\n")