Python HiveContext.range 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: pyspark.sql

클래스/타입: HiveContext

메소드/함수: range

hotexamples.com에서의 예제들: 3

Python HiveContext.range - 3개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 pyspark.sql.HiveContext.range에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

HiveContext(30)

table(30)

sql(30)

setConf(30)

createDataFrame(30)

registerDataFrameAsTable(12)

cacheTable(8)

inferSchema(7)

jsonFile(7)

parquetFile(6)

tableNames(4)

jsonRDD(4)

clearCache(3)

range(3)

dropTempTable(3)

applySchema(3)

registerFunction(2)

hql(2)

registerRDDAsTable(2)

spl(1)

stop(1)

tables(1)

load(1)

show(1)

refreshTable(1)

_createForTesting(1)

getConf(1)

close(1)

cache(1)

_inferSchema(1)

uncacheTable(1)

예제 #1

파일 보기

파일: PricingModel.py 프로젝트: nsikak-akpakpan/nakpakpan-repo

####   to smoothen the two-dimensional histogram of offers. First MA is calculated    ####
####  for commissions partitioned by interest values. In second step MA is calculated ####
####            over the resulting column partitioned by commission values            ####
##########################################################################################

#from time import *
#optStart = clock()

# Initial parameters
abstol = 0.005  # Precision of price optimization
rnd = 3  # Number of significant digits
#bandwidth = 50      # How many rows in forward and backward to take for moving average

# Create net of all possible interest rates, rounded to desired abstol
# value 'key0' is a dummy variable to make cartesian product
NIR = hc.range(int(minNIR / abstol), int((maxNIR + abstol) / abstol), 1, 1)
NIR = NIR.withColumn("Interest", psf.round(NIR.id * abstol, rnd)).withColumn(
    "key", psf.lit("key0")).select('key', 'Interest')

# Create net of all possible commission percentages, rounded to desired abstol
# value 'key0' is a dummy variable to make cartesian product
Com = hc.range(int(minCom / abstol), int((maxCom + abstol) / abstol), 1, 1)
Com = Com.withColumn("CommissionPct",
                     psf.round(Com.id * abstol,
                               rnd)).withColumn("key", psf.lit("key0")).select(
                                   'key', 'CommissionPct')

########################################################################
### New KDE
kdNIR = KernelDensity()
kdCom = KernelDensity()

예제 #2

파일 보기

파일: sql_hive_context_example.py 프로젝트: EmiCareOfCell44/BigDL

def sql_hive_context_example(spark):
    
    # create hive context object.
    hive_ctx = HiveContext(spark.sparkContext)

    # createDataFrame
    l = [('Alice', 18), ('Bob', 20), ('Charley', 22)]
    df = hive_ctx.createDataFrame(l, ('name', 'age'))
    print("createDataFrame API finished")

    # registerDataFrameAsTable 
    hive_ctx.registerDataFrameAsTable(df, "table1")
    print("registerDataFrameAsTable API finished")

    # sql
    tmp_df = hive_ctx.sql("select * from table1")
    tmp_df.show()
    print("sql API finished")

    # table
    tmp_df = hive_ctx.table("table1")
    tmp_df.show()
    print("table API finished")

    # tableNames
    table_names = hive_ctx.tableNames()
    print(table_names)
    print("tableNames API finished")

    # tables
    tables = hive_ctx.tables()
    print(tables)
    print("tables API finished")

    # range
    tmp_df = hive_ctx.range(1,10,2)
    tmp_df.show()
    print("range API finished")

    # dropTempTable
    hive_ctx.dropTempTable("table1")
    table_names = hive_ctx.tableNames()
    print(table_names)
    print("dropTempTable API finished")

    # cacheTable & uncacheTable & clearCache
    df = hive_ctx.range(1,10,2)
    hive_ctx.registerDataFrameAsTable(df, "table")
    hive_ctx.cacheTable("table")
    hive_ctx.uncacheTable("table")
    hive_ctx.clearCache()
    print("cacheTable & uncacheTable & clearCache API finished")

    # createExternalTable

    # newSession

    # registerFunction
    # Deprecated in 2.3.0. Use :func:`spark.udf.register` instead

    # registerJavaFunction
    # Deprecated in 2.3.0. Use :func:`spark.udf.registerJavaFunction` instead

    # setConf & getConf
    hive_ctx.setConf("key1", "value1")
    value = hive_ctx.getConf("key1")
    print(value)
    print("setConf & getConf API finished")

    # refreshTable
    # Exception: An error occurred while calling o26.refreshTable:
    # Method refreshTable([class java.lang.String]) does not exist
    
    print("Finish running HiveContext API")

예제 #3

파일 보기

파일: hive_sample.py 프로젝트: jzhao0802/datafacts4

output_path = "s3://emr-rwes-pa-spark-dev-datastore/BI_IPF_2016/02_results/"
start_time = time.time()
st = datetime.datetime.fromtimestamp(start_time).strftime('%Y%m%d_%H%M%S')

table_name = "hive_test_" + st
datafactz_table_name = "hive_test_datafactz_" + st

pos = sqlContext.read.load((data_path + pos_file),
                           format='com.databricks.spark.csv',
                           header='true',
                           inferSchema='true')

neg = sqlContext.read.load((data_path + neg_file),
                           format='com.databricks.spark.csv',
                           header='true',
                           inferSchema='true')

dataColumns = pos.columns

data = pos.select(dataColumns).unionAll(neg.select(dataColumns))

#for IMS
data.write.save(path=output_path + table_name, format='orc')

#for datafactz
df = sqlContext.range(0, numRowsReq)
datafactz_df = df.select(rand().alias("Col1"),
                         rand().alias("Col2"),
                         rand().alias("Col3"))
datafactz_df.write.save(path=output_path + datafactz_table_name, format='orc')