Ejemplo n.º 1
0
 def test_skips_over_corrupted_lines(self):
     with open(self.test_dir + "/f1", "w") as f:
         f.write(_to_json(make_sample_batch(0), []))
         f.write("\n")
         f.write(_to_json(make_sample_batch(1), []))
         f.write("\n")
         f.write(_to_json(make_sample_batch(2), []))
         f.write("\n")
         f.write(_to_json(make_sample_batch(3), []))
         f.write("\n")
         f.write("{..corrupted_json_record")
     reader = JsonReader([
         self.test_dir + "/f1",
     ])
     seen_a = set()
     for i in range(10):
         batch = reader.next()
         seen_a.add(batch["actions"][0])
     self.assertEqual(len(seen_a), 4)
Ejemplo n.º 2
0
 def test_skips_over_empty_lines_and_files(self):
     open(self.test_dir + "/empty", "w").close()
     with open(self.test_dir + "/f1", "w") as f:
         f.write("\n")
         f.write("\n")
         f.write(_to_json(make_sample_batch(0), []))
     with open(self.test_dir + "/f2", "w") as f:
         f.write(_to_json(make_sample_batch(1), []))
         f.write("\n")
     reader = JsonReader([
         self.test_dir + "/empty",
         self.test_dir + "/f1",
         "file://" + self.test_dir + "/f2",
     ])
     seen_a = set()
     for i in range(100):
         batch = reader.next()
         seen_a.add(batch["actions"][0])
     self.assertEqual(len(seen_a), 2)
Ejemplo n.º 3
0
 def testSkipsOverCorruptedLines(self):
     with open(self.test_dir + "/f1", "w") as f:
         f.write(_to_json(make_sample_batch(0), []))
         f.write("\n")
         f.write(_to_json(make_sample_batch(1), []))
         f.write("\n")
         f.write(_to_json(make_sample_batch(2), []))
         f.write("\n")
         f.write(_to_json(make_sample_batch(3), []))
         f.write("\n")
         f.write("{..corrupted_json_record")
     reader = JsonReader([
         self.test_dir + "/f1",
     ])
     seen_a = set()
     for i in range(10):
         batch = reader.next()
         seen_a.add(batch["actions"][0])
     self.assertEqual(len(seen_a), 4)
Ejemplo n.º 4
0
 def testSkipsOverEmptyLinesAndFiles(self):
     open(self.test_dir + "/empty", "w").close()
     with open(self.test_dir + "/f1", "w") as f:
         f.write("\n")
         f.write("\n")
         f.write(_to_json(make_sample_batch(0), []))
     with open(self.test_dir + "/f2", "w") as f:
         f.write(_to_json(make_sample_batch(1), []))
         f.write("\n")
     reader = JsonReader([
         self.test_dir + "/empty",
         self.test_dir + "/f1",
         "file:" + self.test_dir + "/f2",
     ])
     seen_a = set()
     for i in range(100):
         batch = reader.next()
         seen_a.add(batch["actions"][0])
     self.assertEqual(len(seen_a), 2)
Ejemplo n.º 5
0
 def testSkipsOverCorruptedLines(self):
     ioctx = IOContext(self.test_dir, {}, 0, None)
     with open(self.test_dir + "/f1", "w") as f:
         f.write(_to_json(make_sample_batch(0), []))
         f.write("\n")
         f.write(_to_json(make_sample_batch(1), []))
         f.write("\n")
         f.write(_to_json(make_sample_batch(2), []))
         f.write("\n")
         f.write(_to_json(make_sample_batch(3), []))
         f.write("\n")
         f.write("{..corrupted_json_record")
     reader = JsonReader(ioctx, [
         self.test_dir + "/f1",
     ])
     seen_a = set()
     for i in range(10):
         batch = reader.next()
         seen_a.add(batch["actions"][0])
     self.assertEqual(len(seen_a), 4)
Ejemplo n.º 6
0
 def testSkipsOverEmptyLinesAndFiles(self):
     ioctx = IOContext(self.test_dir, {}, 0, None)
     open(self.test_dir + "/empty", "w").close()
     with open(self.test_dir + "/f1", "w") as f:
         f.write("\n")
         f.write("\n")
         f.write(_to_json(make_sample_batch(0), []))
     with open(self.test_dir + "/f2", "w") as f:
         f.write(_to_json(make_sample_batch(1), []))
         f.write("\n")
     reader = JsonReader(ioctx, [
         self.test_dir + "/empty",
         self.test_dir + "/f1",
         "file:" + self.test_dir + "/f2",
     ])
     seen_a = set()
     for i in range(100):
         batch = reader.next()
         seen_a.add(batch["actions"][0])
     self.assertEqual(len(seen_a), 2)