Beispiel #1
0
def guess_type_from_values_as_string(values, options):
    # Reproduces inferences available in Spark
    # PartitioningUtils.inferPartitionColumnValue()
    # located in org.apache.spark.sql.execution.datasources
    tested_types = (
        IntegerType(),
        LongType(),
        DecimalType(),
        DoubleType(),
        TimestampType(),
        StringType()
    )
    string_type = StringType()
    for tested_type in tested_types:
        type_caster = get_caster(from_type=string_type, to_type=tested_type, options=options)
        try:
            for value in values:
                casted_value = type_caster(value)
                if casted_value is None and value not in ("null", None):
                    raise ValueError
            return tested_type
        except ValueError:
            pass
    # Should never happen
    raise AnalysisException(
        "Unable to find a matching type for some fields, even StringType did not work"
    )
Beispiel #2
0
    def test_session_create_data_frame_from_pandas_data_frame(self):
        try:
            # Pandas is an optional dependency
            # pylint: disable=import-outside-toplevel
            import pandas as pd
        except ImportError as e:
            raise ImportError("pandas is not importable") from e

        pdf = pd.DataFrame([(1, "one"), (2, "two"), (3, "three")])

        df = self.spark.createDataFrame(pdf)

        self.assertEqual(df.count(), 3)
        self.assertListEqual(df.collect(), [
            Row(**{
                "0": 1,
                "1": 'one'
            }),
            Row(**{
                "0": 2,
                "1": 'two'
            }),
            Row(**{
                "0": 3,
                "2": 'three'
            })
        ])
        self.assertEqual(
            df.schema,
            StructType([
                StructField("0", LongType(), True),
                StructField("1", StringType(), True)
            ]))
Beispiel #3
0
 def test_cast_row_to_string(self):
     self.assertEqual(
         cast_to_string(Row(a=collections.OrderedDict([("value", None),
                                                       ("b", {
                                                           "c": 7
                                                       })]),
                            b=None,
                            c=True,
                            d=5.2),
                        StructType([
                            StructField(
                                "a",
                                MapType(
                                    StringType(),
                                    MapType(StringType(), LongType(), True),
                                    True), True),
                            StructField("b", LongType(), True),
                            StructField("c", BooleanType(), True),
                            StructField("d", DoubleType(), True)
                        ]),
                        options=BASE_OPTIONS),
         "[[value ->, b -> [c -> 7]],, true, 5.2]")
Beispiel #4
0
 def test_session_create_data_frame_from_list(self):
     df = self.spark.createDataFrame([
         (1, "one"),
         (2, "two"),
         (3, "three"),
     ])
     self.assertEqual(df.count(), 3)
     self.assertListEqual(
         df.collect(),
         [Row(_1=1, _2='one'),
          Row(_1=2, _2='two'),
          Row(_1=3, _2='three')])
     self.assertEqual(
         df.schema,
         StructType([StructField("_1", LongType(), True), StructField("_2", StringType(), True)])
     )
Beispiel #5
0
 def test_cast_float_to_long(self):
     self.assertEqual(
         cast_to_long(9223372036854775807, LongType(),
                      options=BASE_OPTIONS), 9223372036854775807)
Beispiel #6
0
 def test_cast_float_to_int_with_loop(self):
     self.assertEqual(
         cast_to_int(2147483648, LongType(), options=BASE_OPTIONS),
         -2147483648)
Beispiel #7
0
 def test_cast_float_to_int(self):
     self.assertEqual(
         cast_to_int(2147483647, LongType(), options=BASE_OPTIONS),
         2147483647)