# print(df) d = [{'name': 'paul', 'age': 10,'gender':'male'},{'name': 'alice', 'age': 30,'gender':None}] print(sqlContext.createDataFrame(d).collect()) # rdd = sc.parallelize(l) df = sqlContext.createDataFrame(rdd, ['name', 'age']) # print(df.collect()) #df=sqlContext.createDataFrame(rdd) # print(df.printSchema()) print(df.head(2)) sqlContext.registerDataFrameAsTable(df, "table1") df2 = sqlContext.sql("SELECT name,age from table1 where name='bob'") print(df2.collect()) print(sqlContext.tableNames()) sqlContext.dropTempTable("table1") print(sqlContext.tableNames()) df = sqlContext.read.format('com.databricks.spark.csv').options(header='true', inferschema='true').load("/home/harsh/mapping_minds_training/spark/train_u6lujuX_CVtuZ9i.csv") print(df.groupBy('Gender').agg({'ApplicantIncome': 'mean'}).show()) print(df.head(3)) print(df.printSchema()) print(df.columns) df.cache() print('count-------------------------->',df.count()) #print(df.describe().show()) #print('distinct count------------------------------>',df.distinct().count()) #df.unpersist()
def spark_table_exists(sql_ctx: SQLContext, view: str) -> bool: """ :return: """ # noinspection PyBroadException return view in sql_ctx.tableNames()