def test_select_order(self): data1 = test.list_of_dicts("downloads.json").select( "date", "downloads") data2 = test.list_of_dicts("downloads.json").select( "downloads", "date") assert list(data1[0].keys()) == ["date", "downloads"] assert list(data2[0].keys()) == ["downloads", "date"]
def test_inner_join(self): orig = test.list_of_dicts("downloads.json") holidays = test.list_of_dicts("holidays.json") data = orig.inner_join(holidays, "date") assert len(data) == 35 assert all("holiday" in x for x in data) assert sum(data.pluck("downloads")) == 18226489
def test_left_join(self): orig = test.list_of_dicts("downloads.json") holidays = test.list_of_dicts("holidays.json") data = orig.left_join(holidays, "date") assert len(data) == 905 assert sum("holiday" in x for x in data) == 35 assert sum(data.pluck("downloads")) == 541335745
def test_full_join(self): orig = test.list_of_dicts("downloads.json") holidays = test.list_of_dicts("holidays.json") data = orig.full_join(holidays, "date") assert len(data) == 930 assert max(len(x) for x in data) == 4 assert sum("holiday" in x for x in data) == 60 assert sum("downloads" not in x for x in data) == 25 assert sum(data.pluck("downloads", 0)) == 541335745
def test_left_join_by_tuple(self): orig = test.list_of_dicts("downloads.json") holidays = test.list_of_dicts("holidays.json") holidays = holidays.rename(holiday_date="date") data = orig.left_join(holidays, ("date", "holiday_date")) assert len(data) == 905 assert sum("holiday" in x for x in data) == 35 assert sum("holiday_date" in x for x in data) == 0 assert sum(data.pluck("downloads")) == 541335745
def test_insert(self): orig = test.list_of_dicts("downloads.json") item = dict(date="3000-01-01") data = orig.insert(100, item) assert len(data) == len(orig) + 1 assert isinstance(data[100], AttributeDict) assert data[100] == item
def test_aggregate(self): data = test.list_of_dicts("downloads.json") stat = data.group_by("category").aggregate( date_min=lambda x: min(x.pluck("date")), date_max=lambda x: max(x.pluck("date")), downloads=lambda x: sum(x.pluck("downloads")), ) assert stat == [{ "category": "Darwin", "date_min": "2019-09-16", "date_max": "2020-03-14", "downloads": 6928129, }, { "category": "Linux", "date_min": "2019-09-16", "date_max": "2020-03-14", "downloads": 510902781, }, { "category": "Windows", "date_min": "2019-09-16", "date_max": "2020-03-14", "downloads": 13024960, }, { "category": "null", "date_min": "2019-09-16", "date_max": "2020-03-14", "downloads": 10421576, }, { "category": "other", "date_min": "2019-09-16", "date_max": "2020-03-14", "downloads": 58299, }]
def test_deepcopy_handle_predecessor(self): a = test.list_of_dicts("downloads.json") b = a.select("date") c = b.deepcopy() assert a._predecessor is None assert b._predecessor is a assert c._predecessor is None
def test___rmul__(self): orig = test.list_of_dicts("downloads.json") data = 2 * orig assert isinstance(data, ListOfDicts) assert len(data) == len(orig) * 2 assert data[:len(orig)] == orig assert data[-len(orig):] == orig
def test___setitem__(self): data = test.list_of_dicts("downloads.json") item = dict(date="1970-01-01") data[0] = item assert isinstance(data[0], AttributeDict) assert data[0] == item assert data[0] is not item
def test_append(self): orig = test.list_of_dicts("downloads.json") item = dict(date="3000-01-01") data = orig.append(item) assert len(data) == len(orig) + 1 assert isinstance(data[-1], AttributeDict) assert data[-1] == item
def test_rename(self): orig = test.list_of_dicts("downloads.json") data = orig.rename(ymd="date") assert len(data) == len(orig) assert all("ymd" in x for x in data) assert all("date" not in x for x in data) assert orig._obsolete
def test_modify(self): orig = test.list_of_dicts("downloads.json") data = orig.modify(year=lambda x: int(x.date[:4])) assert len(data) == len(orig) assert all("year" in x for x in data) assert sum(data.pluck("year")) == 1827565 assert orig._obsolete
def test_deepcopy(self): orig = test.list_of_dicts("downloads.json") data = orig.deepcopy() assert data == orig assert data is not orig for a, b in zip(data, orig): assert a == b assert a is not b
def test_fill_missing_keys(self): orig = test.list_of_dicts("downloads.json") for i, item in enumerate(orig): if i % 2 == 0: del item.downloads data = orig.deepcopy().fill_missing_keys(downloads=0) assert sum("downloads" in x for x in orig) == len(orig) // 2 assert sum("downloads" in x for x in data) == len(orig)
def test_modify_if(self): orig = test.list_of_dicts("downloads.json") predicate = lambda x: x.category == "Linux" data = orig.modify_if(predicate, year=lambda x: int(x.date[:4])) assert len(data) == len(orig) assert sum("year" in x for x in data) == 181 assert sum(data.pluck("year", 0)) == 365513 assert orig._obsolete
def test_unselect(self): orig = test.list_of_dicts("downloads.json") data = orig.unselect("date", "downloads") assert len(data) == len(orig) assert all(len(x) == 1 for x in data) assert all("date" not in x for x in data) assert all("downloads" not in x for x in data) assert orig._obsolete
def test_select(self): orig = test.list_of_dicts("downloads.json") data = orig.select("date", "downloads") assert self.is_list_of_dicts(data) assert len(data) == len(orig) assert all(len(x) == 2 for x in data) assert all("date" in x for x in data) assert all("downloads" in x for x in data) assert orig._obsolete
def test_sort_descending(self): orig = test.list_of_dicts("downloads.json") data = orig.sort(date=-1, category=-1) assert len(data) == len(orig) assert all(x in orig for x in data) assert data.pluck("date") == sorted(data.pluck("date"), reverse=True) assert data[0].date == max(data.pluck("date")) assert data[-1].date == min(data.pluck("date")) assert data[0].category == max(data.pluck("category")) assert data[-1].category == min(data.pluck("category"))
def test_fill_missing_keys_all(self): orig = test.list_of_dicts("downloads.json") for i, item in enumerate(orig): if i % 2 == 0: del item.category if i % 4 == 0: del item.date if i % 8 == 0: del item.downloads data = orig.deepcopy().fill_missing_keys() for item in data: assert "category" in item assert "date" in item assert "downloads" in item
def test_sort_with_none_multiple_keys_descending(self): # Nones should be sorted group-wise last. orig = test.list_of_dicts("downloads.json") orig[0].category = None orig[1].date = None orig[2].category = None orig[2].date = None data = orig.sort(date=-1, category=-1) assert data[-3] is orig[0] assert data[-2] is orig[1] assert data[-1] is orig[2]
def list_of_dicts_read_json(): start = time.time() test.list_of_dicts("vehicles.json") return time.time() - start
def list_of_dicts_read_csv(): start = time.time() test.list_of_dicts("vehicles.csv") return time.time() - start
def list_of_dicts(path, length=100_000): data = test.list_of_dicts(path) n = length // len(data) + 1 data = data * n return data.head(length)
def test_unique_by_all(self): orig = test.list_of_dicts("downloads.json") orig = orig.append(orig[-1]) data = orig.unique() assert len(data) == len(orig) - 1
def test_write_pickle_path(self): orig = test.list_of_dicts("downloads.json") handle, path = tempfile.mkstemp(".pkl") orig.write_pickle(Path(path))
def test_write_pickle(self): orig = test.list_of_dicts("downloads.json") handle, path = tempfile.mkstemp(".pkl") orig.write_pickle(path) data = ListOfDicts.read_pickle(path) assert data == orig
def test_write_csv_path(self): orig = test.list_of_dicts("vehicles.csv") handle, path = tempfile.mkstemp(".csv") orig.write_csv(Path(path))
def test___getitem__(self): data = test.list_of_dicts("downloads.json") assert isinstance(data[0], AttributeDict) assert isinstance(data[:100], ListOfDicts)
def test_write_csv(self): orig = test.list_of_dicts("vehicles.csv") handle, path = tempfile.mkstemp(".csv") orig.write_csv(path) data = ListOfDicts.read_csv(path) assert data == orig