def main(*args, **kwargs): ct = Converter(**kwargs) print(ct.head()) print(ct.take(10)) ct.write() if ct.validate(): print("convert successed!") else: raise ValueError("Convert faild!")
def test_parquet_to_csv_overwrite(self): copytree(self.kwargs.get('parquet'), self.parquet_copy) ct = Converter(input=self.parquet_copy, output=self.csv_copy, mode='overwrite') ct.write() df_in = ct.df df_out = ct.sqlCtx.read.csv(self.csv_copy, header=True) self.assertTrue(self.assertRDDEquals(df_in.rdd, df_out.rdd)) ct.tearDown()
def test_csv_to_parquet_overwrite(self): copyfile(self.kwargs.get('csv'), self.csv_copy) ct = Converter(input=self.csv_copy, output=self.parquet_copy, mode='overwrite') ct.write() df_in = ct.df df_out = ct.sqlCtx.read.format(ct.out_format).load(self.parquet_copy) self.assertTrue(self.assertRDDEquals(df_in.rdd, df_out.rdd)) ct.tearDown()