Esempio n. 1
0
        def line_group_generator(input_path):
            # Generate lines from a given input_path, if keep_sorted is True,
            # group lines by key; otherwise have one line per group
            # concatenate all lines with the same key and yield them
            # together
            if keep_sorted:

                def reducer_key(line):
                    return line.split(b'\t')[0]

                # assume that input is a collection of key <tab> value pairs
                # match all non-tab characters
                for _, lines in itertools.groupby(read_text_input(input_path),
                                                  key=reducer_key):
                    yield lines
            else:
                for line in read_text_input(input_path):
                    yield (line, )
Esempio n. 2
0
 def test_bad_glob(self):
     # read_input is a generator, so we won't get an error
     # until we try to read from it
     self.assertRaises(IOError, list,
                       read_text_input(os.path.join(self.tmpdir, 'lions*')))
Esempio n. 3
0
 def test_glob_including_dir(self):
     lines = read_text_input(os.path.join(self.tmpdir, 'beavers*'))
     self.assertEqual(list(lines), [self.BEAVER_DATA] * 4)
Esempio n. 4
0
 def test_dir_recursion(self):
     lines = read_text_input(self.tmpdir)
     self.assertEqual(list(lines), [self.BEAVER_DATA] * 4)
Esempio n. 5
0
 def test_bz2_file(self):
     lines = read_text_input(os.path.join(self.tmpdir, 'beavers.bz2'))
     self.assertEqual(list(lines), [self.BEAVER_DATA])
Esempio n. 6
0
 def test_stdin_can_be_iterator(self):
     lines = read_text_input('-', stdin=[self.BEAVER_DATA] * 5)
     self.assertEqual(list(lines), [self.BEAVER_DATA] * 5)
Esempio n. 7
0
 def test_stdin(self):
     lines = read_text_input('-', stdin=BytesIO(self.BEAVER_DATA))
     self.assertEqual(list(lines), [self.BEAVER_DATA])