def sqlType(cls): return StructType([ StructField("type", ByteType(), False), StructField("size", IntegerType(), True), StructField("indices", ArrayType(IntegerType(), False), True), StructField("values", ArrayType(DoubleType(), False), True) ])
FloatType: datashape.float32, DoubleType: datashape.float64, StringType: datashape.string, BinaryType: datashape.bytes_, BooleanType: datashape.bool_, TimestampType: datashape.datetime_, DateType: datashape.date_, # sql.ArrayType: ?, # sql.MapTYpe: ?, # sql.StructType: ? } dshape_to_sparksql = { datashape.int16: ShortType(), datashape.int32: IntegerType(), datashape.int64: LongType(), datashape.float32: FloatType(), datashape.float64: DoubleType(), datashape.real: DoubleType(), datashape.time_: TimestampType(), datashape.date_: DateType(), datashape.datetime_: TimestampType(), datashape.bool_: BooleanType(), datashape.string: StringType() } ooc_types |= set([SparkDataFrame, SchemaRDD]) SQLContext = memoize(SQLContext) HiveContext = memoize(HiveContext)
source = sc.parallelize([ (int("127"), int("32767"), int("2147483647"), long("9223372036854775807"), float("1.1"), float("2.2"), Decimal("3.3"), "str", bool(0), datetime(2015, 9, 22, 9, 39, 45), date(2015, 9, 22), [1, 2, 3], { "key": "value" }, (1, 2.0, "3.0")) ]) schema = StructType([ StructField("byte", ByteType(), False), StructField("short", ShortType(), False), StructField("int", IntegerType(), False), StructField("long", LongType(), False), StructField("float", FloatType(), False), StructField("double", DoubleType(), False), StructField("decimal", DecimalType(), False), StructField("string", StringType(), False), StructField("boolean", BooleanType(), False), StructField("timestamp", TimestampType(), False), StructField("date", DateType(), False), StructField("array", ArrayType(IntegerType(), False), False), StructField("col_map", MapType(StringType(), StringType(), False), False), StructField( "struct", StructType([ StructField("first", IntegerType(), False), StructField("second", FloatType(), False), StructField("third", StringType(), False) ]), False) ])