def get(self) -> pd.DataFrame: ns = self.__namespace or '_' ds = self.__dataset version = self.__version or 'latest' resp = client.get('/datasets/' + ns + '/' + ds + '/versions/' + version + '/data') return pandavro.from_avro(BytesIO(resp.content))
def test_buffer_e2e(dataframe): tf = NamedTemporaryFile() pdx.to_avro(tf.name, dataframe) with open(tf.name, 'rb') as f: expect = pdx.from_avro(BytesIO(f.read())) assert_frame_equal(expect, dataframe) f.close()
def test_delegation(dataframe): tf = NamedTemporaryFile() pdx.to_avro(tf.name, dataframe) expect = pdx.from_avro(tf.name) expect['DateTime64'] = expect['DateTime64'].astype( np.dtype('datetime64[ns]')) assert_frame_equal(expect, dataframe)
def test_append(dataframe): tf = NamedTemporaryFile() pdx.to_avro(tf.name, dataframe[0:int(dataframe.shape[0] / 2)]) pdx.to_avro(tf.name, dataframe[int(dataframe.shape[0] / 2):], append=True) expect = pdx.from_avro(tf.name) expect['DateTime64'] = expect['DateTime64'].astype( np.dtype('datetime64[ns]')) assert_frame_equal(expect, dataframe)
def _create_tfrecord(file): data = pdx.from_avro("TrainTemp/" + file) fname = file.replace("avro", "tfrecords") with tf.python_io.TFRecordWriter("TrainData/" + fname) as writer: for _, row in data.iterrows(): index = [idx[row[col]] for col, idx in COLUMNS.items()] value = np.full(len(index), 1) label = row.click example = tf.train.Example() example.features.feature["index"].int64_list.value.extend(index) example.features.feature["value"].int64_list.value.extend(value) example.features.feature["label"].int64_list.value.append(label) writer.write(example.SerializeToString())
def test_delegation(dataframe): tf = NamedTemporaryFile() pdx.to_avro(tf.name, dataframe) expect = pdx.from_avro(tf.name) assert_frame_equal(expect, dataframe)
def test_file_path_e2e(dataframe): tf = NamedTemporaryFile() pdx.to_avro(tf.name, dataframe) expect = pdx.from_avro(tf.name) assert_frame_equal(expect, dataframe)
def main(): weather = pdx.from_avro('weather.avro') print(weather) pdx.to_avro('weather_out.avro', weather)
def _load_avro(dirs): gcs = gcsio.GcsIO() with gcs.open(dirs, 'rb') as f: return pdx.from_avro(f)
def deserialize_avro_str_to_pandas(avro_str: str, schema: dict = None) -> pd.DataFrame: return pandavro.from_avro(io.BytesIO(base64.b64decode(avro_str)), schema)
{ "name": "유재석", "birth": "1972-08-14", "job": "MC, 개그맨" }, { "name": "강호동", "birth": "1970-05-11", "job": "MC, 개그맨" }, { "name": "김구라", "birth": "1970-10-03", "job": "MC, 개그맨" }, ] print(type(member)) print() # DataFrame df1 = pd.DataFrame.from_records(member) print(df1) print() # Avro 쓰기 pandavro.to_avro("./data/csv/member.avro", df1) # Avro 읽기 df2 = pandavro.from_avro("./data/csv/member.avro") print(df2)