def test_simple(self): args = ['examples/bad.csv'] output_file = six.StringIO() utility = CSVClean(args, output_file) utility.main() self.assertTrue(os.path.exists('examples/bad_err.csv')) self.assertTrue(os.path.exists('examples/bad_out.csv')) try: with open('examples/bad_err.csv') as f: next(f) self.assertEqual(next(f)[0], '1') self.assertEqual(next(f)[0], '2') self.assertRaises(StopIteration, next, f) with open('examples/bad_out.csv') as f: next(f) self.assertEqual(next(f)[0], '0') self.assertRaises(StopIteration, next, f) finally: # Cleanup os.remove('examples/bad_err.csv') os.remove('examples/bad_out.csv')
def assertCleaned(self, basename, output_lines, error_lines, additional_args=[]): args = ['examples/%s.csv' % basename] + additional_args output_file = six.StringIO() utility = CSVClean(args, output_file) utility.run() output_file.close() output_file = 'examples/%s_out.csv' % basename error_file = 'examples/%s_err.csv' % basename self.assertEqual(os.path.exists(output_file), bool(output_lines)) self.assertEqual(os.path.exists(error_file), bool(error_lines)) try: if output_lines: with open(output_file) as f: for line in output_lines: self.assertEqual(next(f), line) self.assertRaises(StopIteration, next, f) if error_lines: with open(error_file) as f: for line in error_lines: self.assertEqual(next(f), line) self.assertRaises(StopIteration, next, f) finally: if output_lines: os.remove(output_file) if error_lines: os.remove(error_file)
def test_dry_run(self): args = ['-n', 'examples/bad.csv'] output_file = six.StringIO() utility = CSVClean(args, output_file) utility.main() self.assertFalse(os.path.exists('examples/bad_err.csv')) self.assertFalse(os.path.exists('examples/bad_out.csv')) output = six.StringIO(output_file.getvalue()) self.assertEqual(next(output)[:6], 'Line 1') self.assertEqual(next(output)[:6], 'Line 2')
def clean_csv(csv_path, dry_run=True, encoding=None): args = [csv_path] if dry_run: args.append("-n") if encoding: args.extend(["-e", encoding]) output_stream = io.StringIO() utility = CSVClean(args, output_stream) try: utility.run() return 0 except UnicodeDecodeError as ud: logger.error(ud) for enc in ENCODINGS: exit_code = clean_csv(csv_path, dry_run=False, encoding=enc) if exit_code == 0: break finally: print("soy yo")
def launch_new_instance(): utility = CSVClean() utility.main()