def get_data(self, run_id, data_type=None, groupby=False, start=None, size=None): if size is not None and start is None: start = 0 self.flush() filename = os.path.join(self.directory, run_id + "-db.json") if not os.path.exists(filename): raise StopIteration() self._update_headers(run_id) def _filtered(data): return data_type is not None and data_type != data.get("data_type") if not groupby: for data in self._batch(filename, start, size, _filtered, run_id=run_id): yield data else: result = {} for data in self._batch(filename, start, size, _filtered, run_id=run_id): data_hash = dict_hash(data, ["count"]) if data_hash in result: result[data_hash]["count"] += 1 else: data["count"] = 1 result[data_hash] = data for data in result.values(): yield data
def test_dict_hash(self): data1 = {1: 2, 3: 4} data2 = {1: 2, 3: 4} self.assertEqual(dict_hash(data1), dict_hash(data2)) data1['count'] = 'b' self.assertNotEqual(dict_hash(data1), dict_hash(data2)) self.assertEqual(dict_hash(data1, omit_keys=['count']), dict_hash(data2))
def get_data(self, run_id, data_type=None, groupby=False, start=None, size=None): if size is not None and start is None: start = 0 self.flush() filename = os.path.join(self.directory, run_id + '-db.json') if not os.path.exists(filename): raise StopIteration() self._update_headers(run_id) def _filtered(data): return (data_type is not None and data_type != data.get('data_type')) if not groupby: for data in self._batch(filename, start, size, _filtered, run_id=run_id): yield data else: result = {} for data in self._batch(filename, start, size, _filtered, run_id=run_id): data_hash = dict_hash(data, ['count']) if data_hash in result: result[data_hash]['count'] += 1 else: data['count'] = 1 result[data_hash] = data for data in result.values(): yield data