def test_skips_over_corrupted_lines(self): with open(self.test_dir + "/f1", "w") as f: f.write(_to_json(make_sample_batch(0), [])) f.write("\n") f.write(_to_json(make_sample_batch(1), [])) f.write("\n") f.write(_to_json(make_sample_batch(2), [])) f.write("\n") f.write(_to_json(make_sample_batch(3), [])) f.write("\n") f.write("{..corrupted_json_record") reader = JsonReader([ self.test_dir + "/f1", ]) seen_a = set() for i in range(10): batch = reader.next() seen_a.add(batch["actions"][0]) self.assertEqual(len(seen_a), 4)
def test_skips_over_empty_lines_and_files(self): open(self.test_dir + "/empty", "w").close() with open(self.test_dir + "/f1", "w") as f: f.write("\n") f.write("\n") f.write(_to_json(make_sample_batch(0), [])) with open(self.test_dir + "/f2", "w") as f: f.write(_to_json(make_sample_batch(1), [])) f.write("\n") reader = JsonReader([ self.test_dir + "/empty", self.test_dir + "/f1", "file://" + self.test_dir + "/f2", ]) seen_a = set() for i in range(100): batch = reader.next() seen_a.add(batch["actions"][0]) self.assertEqual(len(seen_a), 2)
def testSkipsOverCorruptedLines(self): with open(self.test_dir + "/f1", "w") as f: f.write(_to_json(make_sample_batch(0), [])) f.write("\n") f.write(_to_json(make_sample_batch(1), [])) f.write("\n") f.write(_to_json(make_sample_batch(2), [])) f.write("\n") f.write(_to_json(make_sample_batch(3), [])) f.write("\n") f.write("{..corrupted_json_record") reader = JsonReader([ self.test_dir + "/f1", ]) seen_a = set() for i in range(10): batch = reader.next() seen_a.add(batch["actions"][0]) self.assertEqual(len(seen_a), 4)
def testSkipsOverEmptyLinesAndFiles(self): open(self.test_dir + "/empty", "w").close() with open(self.test_dir + "/f1", "w") as f: f.write("\n") f.write("\n") f.write(_to_json(make_sample_batch(0), [])) with open(self.test_dir + "/f2", "w") as f: f.write(_to_json(make_sample_batch(1), [])) f.write("\n") reader = JsonReader([ self.test_dir + "/empty", self.test_dir + "/f1", "file:" + self.test_dir + "/f2", ]) seen_a = set() for i in range(100): batch = reader.next() seen_a.add(batch["actions"][0]) self.assertEqual(len(seen_a), 2)
def testSkipsOverCorruptedLines(self): ioctx = IOContext(self.test_dir, {}, 0, None) with open(self.test_dir + "/f1", "w") as f: f.write(_to_json(make_sample_batch(0), [])) f.write("\n") f.write(_to_json(make_sample_batch(1), [])) f.write("\n") f.write(_to_json(make_sample_batch(2), [])) f.write("\n") f.write(_to_json(make_sample_batch(3), [])) f.write("\n") f.write("{..corrupted_json_record") reader = JsonReader(ioctx, [ self.test_dir + "/f1", ]) seen_a = set() for i in range(10): batch = reader.next() seen_a.add(batch["actions"][0]) self.assertEqual(len(seen_a), 4)
def testSkipsOverEmptyLinesAndFiles(self): ioctx = IOContext(self.test_dir, {}, 0, None) open(self.test_dir + "/empty", "w").close() with open(self.test_dir + "/f1", "w") as f: f.write("\n") f.write("\n") f.write(_to_json(make_sample_batch(0), [])) with open(self.test_dir + "/f2", "w") as f: f.write(_to_json(make_sample_batch(1), [])) f.write("\n") reader = JsonReader(ioctx, [ self.test_dir + "/empty", self.test_dir + "/f1", "file:" + self.test_dir + "/f2", ]) seen_a = set() for i in range(100): batch = reader.next() seen_a.add(batch["actions"][0]) self.assertEqual(len(seen_a), 2)