예제 #1
0
 def sqlType(cls):
     return StructType([
         StructField("type", ByteType(), False),
         StructField("size", IntegerType(), True),
         StructField("indices", ArrayType(IntegerType(), False), True),
         StructField("values", ArrayType(DoubleType(), False), True)
     ])
예제 #2
0
    FloatType: datashape.float32,
    DoubleType: datashape.float64,
    StringType: datashape.string,
    BinaryType: datashape.bytes_,
    BooleanType: datashape.bool_,
    TimestampType: datashape.datetime_,
    DateType: datashape.date_,
    # sql.ArrayType: ?,
    # sql.MapTYpe: ?,
    # sql.StructType: ?
}

dshape_to_sparksql = {
    datashape.int16: ShortType(),
    datashape.int32: IntegerType(),
    datashape.int64: LongType(),
    datashape.float32: FloatType(),
    datashape.float64: DoubleType(),
    datashape.real: DoubleType(),
    datashape.time_: TimestampType(),
    datashape.date_: DateType(),
    datashape.datetime_: TimestampType(),
    datashape.bool_: BooleanType(),
    datashape.string: StringType()
}

ooc_types |= set([SparkDataFrame, SchemaRDD])

SQLContext = memoize(SQLContext)
HiveContext = memoize(HiveContext)
예제 #3
0
source = sc.parallelize([
    (int("127"), int("32767"), int("2147483647"), long("9223372036854775807"),
     float("1.1"), float("2.2"), Decimal("3.3"), "str", bool(0),
     datetime(2015, 9, 22, 9, 39, 45), date(2015, 9, 22), [1, 2, 3], {
         "key": "value"
     }, (1, 2.0, "3.0"))
])

schema = StructType([
    StructField("byte", ByteType(), False),
    StructField("short", ShortType(), False),
    StructField("int", IntegerType(), False),
    StructField("long", LongType(), False),
    StructField("float", FloatType(), False),
    StructField("double", DoubleType(), False),
    StructField("decimal", DecimalType(), False),
    StructField("string", StringType(), False),
    StructField("boolean", BooleanType(), False),
    StructField("timestamp", TimestampType(), False),
    StructField("date", DateType(), False),
    StructField("array", ArrayType(IntegerType(), False), False),
    StructField("col_map", MapType(StringType(), StringType(), False), False),
    StructField(
        "struct",
        StructType([
            StructField("first", IntegerType(), False),
            StructField("second", FloatType(), False),
            StructField("third", StringType(), False)
        ]), False)
])