Ejemplo n.º 1
0
def main(*args, **kwargs):
    ct = Converter(**kwargs)
    print(ct.head())
    print(ct.take(10))
    ct.write()
    if ct.validate():
        print("convert successed!")
    else:
        raise ValueError("Convert faild!")
    def test_parquet_to_csv_overwrite(self):
        copytree(self.kwargs.get('parquet'), self.parquet_copy)

        ct = Converter(input=self.parquet_copy,
                       output=self.csv_copy,
                       mode='overwrite')
        ct.write()
        df_in = ct.df
        df_out = ct.sqlCtx.read.csv(self.csv_copy, header=True)
        self.assertTrue(self.assertRDDEquals(df_in.rdd, df_out.rdd))
        ct.tearDown()
    def test_csv_to_parquet_overwrite(self):
        copyfile(self.kwargs.get('csv'), self.csv_copy)

        ct = Converter(input=self.csv_copy,
                       output=self.parquet_copy,
                       mode='overwrite')
        ct.write()
        df_in = ct.df
        df_out = ct.sqlCtx.read.format(ct.out_format).load(self.parquet_copy)
        self.assertTrue(self.assertRDDEquals(df_in.rdd, df_out.rdd))

        ct.tearDown()