Example #1
0
 def test_column_order(self):
   # Column order should be preserved, not just alphabetical.
   df = self.df[['temp', 'station', 'time']]
   write_dataframe(self.client, 'weather-ordered.avro', df)
   assert_frame_equal(
     read_dataframe(self.client, 'weather-ordered.avro'),
     df
   )
Example #2
0
#!/usr/bin/env python
# encoding: utf-8

"""Dataframe extension example."""

from hdfs import Config
from hdfs.ext.dataframe import read_dataframe, write_dataframe
import pandas as pd


# Get the default alias' client.
client = Config().get_client()

# A sample dataframe.
df = pd.DataFrame.from_records([
  {'A': 1, 'B': 2},
  {'A': 11, 'B': 23}
])

# Write dataframe to HDFS using Avro serialization.
write_dataframe(client, 'data.avro', df, overwrite=True)

# Read the Avro file back from HDFS.
_df = read_dataframe(client, 'data.avro')

# The frames match!
pd.util.testing.assert_frame_equal(df, _df)
Example #3
0
 def test_write(self):
   write_dataframe(self.client, 'weather.avro', self.df)
   with AvroReader(self.client, 'weather.avro') as reader:
     eq_(list(reader), self.records)
 def test_column_order(self):
     # Column order should be preserved, not just alphabetical.
     df = self.df[['temp', 'station', 'time']]
     write_dataframe(self.client, 'weather-ordered.avro', df)
     assert_frame_equal(read_dataframe(self.client, 'weather-ordered.avro'),
                        df)
 def test_write(self):
     write_dataframe(self.client, 'weather.avro', self.df)
     with AvroReader(self.client, 'weather.avro') as reader:
         eq_(list(reader), self.records)
Example #6
0
#!/usr/bin/env python
# encoding: utf-8
"""Dataframe extension example."""

from hdfs import Config
from hdfs.ext.dataframe import read_dataframe, write_dataframe
import pandas as pd

# Get the default alias' client.
client = Config().get_client()

# A sample dataframe.
df = pd.DataFrame.from_records([{'A': 1, 'B': 2}, {'A': 11, 'B': 23}])

# Write dataframe to HDFS using Avro serialization.
write_dataframe(client, 'data.avro', df, overwrite=True)

# Read the Avro file back from HDFS.
_df = read_dataframe(client, 'data.avro')

# The frames match!
pd.util.testing.assert_frame_equal(df, _df)