Example #1
0
 def test_column_order(self):
   # Column order should be preserved, not just alphabetical.
   df = self.df[['temp', 'station', 'time']]
   write_dataframe(self.client, 'weather-ordered.avro', df)
   assert_frame_equal(
     read_dataframe(self.client, 'weather-ordered.avro'),
     df
   )
Example #2
0
#!/usr/bin/env python
# encoding: utf-8

"""Dataframe extension example."""

from hdfs import Config
from hdfs.ext.dataframe import read_dataframe, write_dataframe
import pandas as pd


# Get the default alias' client.
client = Config().get_client()

# A sample dataframe.
df = pd.DataFrame.from_records([
  {'A': 1, 'B': 2},
  {'A': 11, 'B': 23}
])

# Write dataframe to HDFS using Avro serialization.
write_dataframe(client, 'data.avro', df, overwrite=True)

# Read the Avro file back from HDFS.
_df = read_dataframe(client, 'data.avro')

# The frames match!
pd.util.testing.assert_frame_equal(df, _df)
Example #3
0
 def test_read(self):
   self.client.upload('weather.avro', osp.join(self.dpath, 'weather.avro'))
   assert_frame_equal(
     read_dataframe(self.client, 'weather.avro'),
     self.df
   )
 def test_column_order(self):
     # Column order should be preserved, not just alphabetical.
     df = self.df[['temp', 'station', 'time']]
     write_dataframe(self.client, 'weather-ordered.avro', df)
     assert_frame_equal(read_dataframe(self.client, 'weather-ordered.avro'),
                        df)
 def test_read(self):
     self.client.upload('weather.avro', osp.join(self.dpath,
                                                 'weather.avro'))
     assert_frame_equal(read_dataframe(self.client, 'weather.avro'),
                        self.df)
Example #6
0
#!/usr/bin/env python
# encoding: utf-8
"""Dataframe extension example."""

from hdfs import Config
from hdfs.ext.dataframe import read_dataframe, write_dataframe
import pandas as pd

# Get the default alias' client.
client = Config().get_client()

# A sample dataframe.
df = pd.DataFrame.from_records([{'A': 1, 'B': 2}, {'A': 11, 'B': 23}])

# Write dataframe to HDFS using Avro serialization.
write_dataframe(client, 'data.avro', df, overwrite=True)

# Read the Avro file back from HDFS.
_df = read_dataframe(client, 'data.avro')

# The frames match!
pd.util.testing.assert_frame_equal(df, _df)