Python SchemaUtils 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: src.catalog.schema_utils

클래스/타입: SchemaUtils

hotexamples.com에서의 예제들: 5

Python SchemaUtils - 5개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 src.catalog.schema_utils.SchemaUtils에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

get_petastorm_column(2)

get_petastorm_schema(2)

petastorm_type_cast(1)

예제 #1

파일 보기

 def test_get_petastorm_schema(self, mock_get_pc, mock_uni):
     cols = [MagicMock() for i in range(2)]
     mock_get_pc.side_effect = [1, 2]
     self.assertEqual(SchemaUtils.get_petastorm_schema('name', cols),
                      mock_uni.return_value)
     mock_get_pc.assert_has_calls([call(cols[0]), call(cols[1])])
     mock_uni.assert_called_once_with('name', [1, 2])

예제 #2

파일 보기

    def test_get_petastorm_column(self):
        col_name = 'frame_id'
        col = DataFrameColumn(col_name, ColumnType.INTEGER, False)
        petastorm_col = UnischemaField(col_name, np.int32, (),
                                       ScalarCodec(IntegerType()), False)
        self.assertEqual(SchemaUtils.get_petastorm_column(col), petastorm_col)

        col = DataFrameColumn(col_name, ColumnType.FLOAT, True)
        petastorm_col = UnischemaField(col_name, np.float64, (),
                                       ScalarCodec(FloatType()), True)
        self.assertEqual(SchemaUtils.get_petastorm_column(col), petastorm_col)

        col = DataFrameColumn(col_name, ColumnType.TEXT, False)
        petastorm_col = UnischemaField(col_name, np.str_, (),
                                       ScalarCodec(StringType()), False)
        self.assertEqual(SchemaUtils.get_petastorm_column(col), petastorm_col)

        col = DataFrameColumn(col_name, None, True, [10, 10])
        self.assertEqual(SchemaUtils.get_petastorm_column(col), None)

예제 #3

파일 보기

 def test_get_petastorm_column_ndarray(self):
     expected_type = [
         np.int8, np.uint8, np.int16, np.int32, np.int64, np.unicode_,
         np.bool_, np.float32, np.float64, Decimal, np.str_, np.datetime64
     ]
     col_name = 'frame_id'
     for array_type, np_type in zip(NdArrayType, expected_type):
         col = DataFrameColumn(col_name, ColumnType.NDARRAY, True,
                               array_type, [10, 10])
         petastorm_col = UnischemaField(col_name, np_type, [10, 10],
                                        NdarrayCodec(), True)
         self.assertEqual(SchemaUtils.get_petastorm_column(col),
                          petastorm_col)

예제 #4

파일 보기

 def test_df_schema(self):
     schema_name = "foo"
     column_1 = DataFrameColumn("frame_id", ColumnType.INTEGER, False)
     column_2 = DataFrameColumn("frame_data", ColumnType.NDARRAY, False,
                                [28, 28])
     column_3 = DataFrameColumn("frame_label", ColumnType.INTEGER, False)
     col_list = [column_1, column_2, column_3]
     schema = DataFrameSchema(schema_name, col_list)
     expected_schema = SchemaUtils.get_petastorm_schema(
         schema_name, col_list)
     self.assertEqual(schema.name, schema_name)
     self.assertEqual(schema.column_list, col_list)
     self.assertEqual(schema.petastorm_schema.fields,
                      expected_schema.fields)
     for field1, field2 in zip(schema.petastorm_schema.fields,
                               expected_schema.fields):
         self.assertEqual(field1, field2)
     self.assertEqual(schema.pyspark_schema,
                      expected_schema.as_spark_schema())

예제 #5

파일 보기

    def exec(self):
        """
        Based on the table it constructs a valid tuple using the values
        provided.
        Right now we assume there are no missing values
        """
        table_id = self.node.video_id
        data_tuple = []
        for col, val in zip(self.node.column_list, self.node.value_list):
            val = val.evaluate()
            val.frames.columns = [col.col_name]
            data_tuple.append(val)

        batch = Batch.merge_column_wise(data_tuple)
        metadata = CatalogManager().get_metadata(table_id)
        # verify value types are consistent

        batch.frames = SchemaUtils.petastorm_type_cast(
            metadata.schema.petastorm_schema, batch.frames)
        StorageEngine.write(metadata, batch)