def test_header_only_input_one_output_file_with_header(self): rows = ReaderWriter() rows.writerow(u'a b'.split()) m.split(rows, prefix='split.', chunk_size=1) self.assertEqual(u'a,b', header(u'split.0'))
def test_multiple_output_files_have_same_header(self): rows = ReaderWriter() rows.writerow(u'a b'.split()) rows.writerow([1, 2]) rows.writerow([3, 4]) m.split(rows, prefix='split.', chunk_size=1) self.assertEqual(u'a,b', header(u'split.0')) self.assertEqual(u'a,b', header(u'split.1'))
def test_less_data_rows_than_chunk_size_one_file_created(self): rows = ReaderWriter() rows.writerow(u'a b'.split()) rows.writerow([1, 2]) rows.writerow([3, 4]) m.split(rows, prefix='split.', chunk_size=3) self.assertTrue(os.path.exists(u'split.0')) self.assertFalse(os.path.exists(u'split.1'))
def test_output_file_contains_rows_from_input(self): rows = ReaderWriter() rows.writerow(u'a b'.split()) rows.writerow([1, 2]) rows.writerow([3, 4]) m.split(rows, prefix='split.', chunk_size=2) with codecs.open('split.0', encoding='utf8') as f: self.assertEqual([[u'a', u'b'], [u'1', u'2'], [u'3', u'4']], list(csv.reader(f)))
def test_11_data_rows_chunk_size_1_11_files_created(self): rows = ReaderWriter() rows.writerow(u'a b'.split()) for i in range(11): rows.writerow([i, i + 1]) m.split(rows, prefix='split.', chunk_size=1) self.assertTrue(os.path.exists(u'split.0')) self.assertTrue(os.path.exists(u'split.1')) # ... self.assertTrue(os.path.exists(u'split.10')) self.assertFalse(os.path.exists(u'split.11'))
def test_output_file_contains_rows_from_input(self): rows = ReaderWriter() rows.writerow(u'a b'.split()) rows.writerow([1, 2]) rows.writerow([3, 4]) m.split(rows, prefix='split.', chunk_size=2) with codecs.open('split.0', encoding='utf8') as f: self.assertEqual( [[u'a', u'b'], [u'1', u'2'], [u'3', u'4']], list(csv.reader(f)))
def test_out_spec(self): csv_in = ReaderWriter() csv_in.writerow('a b c'.split()) csv_in.writerow('a1 b1 c1'.split()) csv_in.writerow('a2 b2 c2'.split()) csv_out_spec = ReaderWriter() csv_out_unspec = ReaderWriter() m.unzip(csv_in, ['a'], csv_out_spec, csv_out_unspec) self.assertListEqual( ['id a'.split(), '0 a1'.split(), '1 a2'.split()], csv_out_spec.rows)
def csv_header_a_b_c(self): csv = ReaderWriter() csv.writerow('a b c'.split()) return csv