Python HiveContext.tableNames Exemples

Langage de programmation: Python

Espace de nommage/Pack: pyspark.sql

Class/Type: HiveContext

Méthode/Fonction: tableNames

Exemples au hotexamples.com: 5

Python HiveContext.tableNames - 5 exemples trouvés. Ce sont les exemples réels les mieux notés de pyspark.sql.HiveContext.tableNames extraits de projets open source. Vous pouvez noter les exemples pour nous aider à en améliorer la qualité.

Méthodes fréquemment utilisées

Afficher Cacher

HiveContext(30)

table(30)

sql(30)

setConf(30)

createDataFrame(30)

registerDataFrameAsTable(12)

cacheTable(8)

inferSchema(7)

jsonFile(7)

parquetFile(6)

tableNames(4)

jsonRDD(4)

clearCache(3)

range(3)

dropTempTable(3)

applySchema(3)

registerFunction(2)

hql(2)

registerRDDAsTable(2)

spl(1)

stop(1)

tables(1)

load(1)

show(1)

refreshTable(1)

_createForTesting(1)

getConf(1)

close(1)

cache(1)

_inferSchema(1)

uncacheTable(1)

Méthodes fréquemment utilisées

HiveContext (30)

table (30)

sql (30)

setConf (30)

createDataFrame (30)

registerDataFrameAsTable (12)

cacheTable (8)

inferSchema (7)

jsonFile (7)

parquetFile (6)

Méthodes fréquemment utilisées

tableNames (4)

jsonRDD (4)

clearCache (3)

range (3)

dropTempTable (3)

applySchema (3)

registerFunction (2)

hql (2)

registerRDDAsTable (2)

spl (1)

stop (1)

tables (1)

load (1)

show (1)

refreshTable (1)

_createForTesting (1)

getConf (1)

close (1)

cache (1)

_inferSchema (1)

Méthodes fréquemment utilisées

stop (1)

tables (1)

load (1)

show (1)

refreshTable (1)

_createForTesting (1)

getConf (1)

close (1)

cache (1)

_inferSchema (1)

uncacheTable (1)

Méthodes fréquemment utilisées

uncacheTable (1)

Exemple #1

0

Afficher le fichier

# imports from pyspark import SparkContext from pyspark.sql import HiveContext # start Spark and Hive SQL contexts sc = SparkContext("local", "demo app") hc = HiveContext(sc) # get the table print "Printing tables in DB:" print hc.tableNames() print "Printing first 10 rows in zip_neighborhood_borough_xref table." sqlQuery = "SELECT * FROM zip_neighborhood_borough_xref limit 10" hc.sql(sqlQuery).show()

Exemple #2

0

Afficher le fichier

Fichier : sql_hive_context_example.py Projet : EmiCareOfCell44/BigDL

def sql_hive_context_example(spark): # create hive context object. hive_ctx = HiveContext(spark.sparkContext) # createDataFrame l = [('Alice', 18), ('Bob', 20), ('Charley', 22)] df = hive_ctx.createDataFrame(l, ('name', 'age')) print("createDataFrame API finished") # registerDataFrameAsTable hive_ctx.registerDataFrameAsTable(df, "table1") print("registerDataFrameAsTable API finished") # sql tmp_df = hive_ctx.sql("select * from table1") tmp_df.show() print("sql API finished") # table tmp_df = hive_ctx.table("table1") tmp_df.show() print("table API finished") # tableNames table_names = hive_ctx.tableNames() print(table_names) print("tableNames API finished") # tables tables = hive_ctx.tables() print(tables) print("tables API finished") # range tmp_df = hive_ctx.range(1,10,2) tmp_df.show() print("range API finished") # dropTempTable hive_ctx.dropTempTable("table1") table_names = hive_ctx.tableNames() print(table_names) print("dropTempTable API finished") # cacheTable & uncacheTable & clearCache df = hive_ctx.range(1,10,2) hive_ctx.registerDataFrameAsTable(df, "table") hive_ctx.cacheTable("table") hive_ctx.uncacheTable("table") hive_ctx.clearCache() print("cacheTable & uncacheTable & clearCache API finished") # createExternalTable # newSession # registerFunction # Deprecated in 2.3.0. Use :func:`spark.udf.register` instead # registerJavaFunction # Deprecated in 2.3.0. Use :func:`spark.udf.registerJavaFunction` instead # setConf & getConf hive_ctx.setConf("key1", "value1") value = hive_ctx.getConf("key1") print(value) print("setConf & getConf API finished") # refreshTable # Exception: An error occurred while calling o26.refreshTable: # Method refreshTable([class java.lang.String]) does not exist print("Finish running HiveContext API")

Exemple #3

0

Afficher le fichier

Fichier : pysparkdemo.py Projet : QuantumBear/spark-tutorial

from pyspark import SparkContext from pyspark.sql import HiveContext from pyspark.sql.types import * from udf.pyspark.udfs import * if __name__ == "__main__": sc = SparkContext(appName="SparkSQL:[demo][pysparkdemo]") sqlContext = HiveContext(sc) # RDD is created from a list of rows df = sqlContext.read.parquet("/mvad/warehouse/session/dspan/date=2015-09-01/") df.registerTempTable("sessionlog") for table in sqlContext.tableNames(): print table df.printSchema() sqlContext.udf.register("toNormalCookie",toNormalCookie ) sql1 = """ select toNormalCookie(cookie) as cookiestr,eventTime,eventType,geoInfo.country as country, geoInfo.province as province from sessionlog limit 10 """.replace('\n',' ') sample = sqlContext.sql(sql1) sample.show() sql2 = """select eventType, count(cookie) as count from sessionlog group by eventType """.replace('\n',' ') result = sqlContext.sql(sql2) result.cache() # only show 20 records

Exemple #4

0

Afficher le fichier

Fichier : pysparkdemo.py Projet : cxzdy/spark-bootstrap

from pyspark import SparkContext from pyspark.sql import HiveContext from pyspark.sql.types import Row, StructField, StructType, StringType, IntegerType if __name__ == "__main__": sc = SparkContext(appName="SparkSQL:[com.mvad.spark.demo][pysparkdemo]") sqlContext = HiveContext(sc) # RDD is created from a list of rows df = sqlContext.parquetFile( "/mvad/warehouse/session/dspan/date=2015-05-01/") df.registerTempTable("sessionlog") for table in sqlContext.tableNames(): print table df.printSchema() sqlContext.udf.register("intarr2str", lambda array: "".join(map(str, array))) sql1 = """ select intarr2str(cookie) as cookiestr,eventTime,eventType,geoInfo.country as country, geoInfo.province as province from sessionlog limit 10 """.replace( '\n', ' ') sample = sqlContext.sql(sql1) sample.show() sql2 = """select eventType, count(cookie) as count from sessionlog group by eventType """.replace('\n', ' ') result = sqlContext.sql(sql2) result.cache() # only show 20 records result.show()

Exemple #5

0

Afficher le fichier

spark_df = sqlCtx.read.format('com.databricks.spark.csv').options(header='true', inferschema='true').load("./data/clicks_test.csv") spark_df.registerTempTable("clicks_train") try: sqlCtx.sql("drop table table_7") except pyspark.sql.utils.AnalysisException as e: pass except Exception as e: pass print("*** CREATING TABLE 7 ***") # table 7 is train but geo location needs to be coded. # I don't know how to coded in sql so I will just remove it for now sqlCtx.sql("create table table_7 as select a.document_id, a.platform, " "a.traffic_source, a.display_id, a.source_id, a.publisher_id, " "a.category_id, b.ad_id, a.topic_id from table_6 a inner join clicks_train b on a.display_id = b.display_id") print("*** FINISHED CREATING TABLE 7 ***") # create train file from table_7 train_spark_df = sqlCtx.sql("select * from table_7") train_spark_df.write.csv('./cleaned_data/test_files_from_spark') # TODO do test file spark_tables = sqlCtx.tableNames() print(spark_tables)