Python Row 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: fast_pyspark_tester.sql.types

클래스/타입: Row

hotexamples.com에서의 예제들: 8

Python Row - 8개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 fast_pyspark_tester.sql.types.Row에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

Row(8)

자주 사용되는 메소드들

Row (8)

예제 #1

파일 보기

 def test_session_create_data_frame_from_list(self):
     df = self.spark.createDataFrame([(1, 'one'), (2, 'two'), (3, 'three')])
     self.assertEqual(df.count(), 3)
     self.assertListEqual(
         df.collect(), [Row(_1=1, _2='one'), Row(_1=2, _2='two'), Row(_1=3, _2='three')],
     )
     self.assertEqual(
         df.schema, StructType([StructField('_1', LongType(), True), StructField('_2', StringType(), True)]),
     )

예제 #2

파일 보기

파일: test_casts.py 프로젝트: svaningelgem/fast_pyspark_tester

 def test_cast_row_to_string(self):
     self.assertEqual(
         cast_to_string(
             Row(
                 a=collections.OrderedDict([('value', None), ('b', {
                     'c': 7
                 })]),
                 b=None,
                 c=True,
                 d=5.2,
             ),
             StructType([
                 StructField(
                     'a',
                     MapType(
                         StringType(),
                         MapType(StringType(), LongType(), True),
                         True,
                     ),
                     True,
                 ),
                 StructField('b', LongType(), True),
                 StructField('c', BooleanType(), True),
                 StructField('d', DoubleType(), True),
             ]),
             options=BASE_OPTIONS,
         ),
         '[[value ->, b -> [c -> 7]],, true, 5.2]',
     )

예제 #3

파일 보기

    def test_session_create_data_frame_from_pandas_data_frame(self):
        try:
            # Pandas is an optional dependency
            # pylint: disable=import-outside-toplevel
            import pandas as pd
        except ImportError:
            raise Exception('pandas is not importable')

        pdf = pd.DataFrame([(1, 'one'), (2, 'two'), (3, 'three')])

        df = self.spark.createDataFrame(pdf)

        self.assertEqual(df.count(), 3)
        self.assertListEqual(
            df.collect(), [Row(**{'0': 1, '1': 'one'}), Row(**{'0': 2, '1': 'two'}), Row(**{'0': 3, '2': 'three'})],
        )
        self.assertEqual(
            df.schema, StructType([StructField('0', LongType(), True), StructField('1', StringType(), True)]),
        )

예제 #4

파일 보기

파일: test_casts.py 프로젝트: svaningelgem/fast_pyspark_tester

 def test_cast_to_struct(self):
     self.assertEqual(
         cast_to_struct(
             Row(character='Alice', day='28', month='8', year='2019'),
             from_type=StructType(fields=[
                 StructField('character', StringType()),
                 StructField('day', StringType()),
                 StructField('month', StringType()),
                 StructField('year', StringType()),
             ]),
             to_type=StructType(fields=[
                 StructField('character', StringType()),
                 StructField('day', IntegerType()),
                 StructField('month', IntegerType()),
                 StructField('year', IntegerType()),
             ]),
             options=BASE_OPTIONS,
         ),
         Row(character='Alice', day=28, month=8, year=2019),
     )

예제 #5

파일 보기

파일: internals.py 프로젝트: svaningelgem/fast_pyspark_tester

        def create_counts_row(col1Item, rows):
            counts_row = [None] * (column_size + 1)

            def parse_row(row):
                column_index = distinct_col2[clean_element(row[1])]
                counts_row[int(column_index + 1)] = int(row[2])

            rows.foreach(parse_row)
            # the value of col1 is the first value, the rest are the counts
            counts_row[0] = clean_element(col1Item)
            return Row(counts_row)

예제 #6

파일 보기

파일: test_stat_counter.py 프로젝트: svaningelgem/fast_pyspark_tester

def test_column_stat_helper():
    """
    Expected quantile values come from use of org.apache.spark.sql.catalyst.util.QuantileSummaries
    """
    schema = StructType([StructField('value', IntegerType())])
    helper = ColumnStatHelper(col('value'))
    for i in range(1, 100001):
        helper.merge(Row(value=i), schema)
    helper.finalize()
    assert helper.count == 100000
    assert helper.min == 1
    assert helper.max == 100000
    assert helper.mean == 50000.5
    assert helper.stddev == 28867.65779668774  # sample standard deviation
    assert helper.get_quantile(0) == 1
    assert helper.get_quantile(0.25) == 24998
    assert helper.get_quantile(0.5) == 50000
    assert helper.get_quantile(0.75) == 74993
    assert helper.get_quantile(1) == 100000

예제 #7

파일 보기

 def test_session_range(self):
     df = self.spark.range(3)
     self.assertEqual(df.count(), 3)
     self.assertListEqual(df.collect(), [Row(id=0), Row(id=1), Row(id=2)])
     self.assertEqual(list(df.toLocalIterator()), [Row(id=0), Row(id=1), Row(id=2)])

예제 #8

파일 보기

 def test_session_create_data_frame_from_list_with_schema(self):
     schema = StructType([StructField('map', MapType(StringType(), IntegerType()), True)])
     df = self.spark.createDataFrame([({'a': 1},)], schema=schema)
     self.assertEqual(df.count(), 1)
     self.assertListEqual(df.collect(), [Row(map={'a': 1})])
     self.assertEqual(df.schema, schema)