Example #1
0


####################################
#pyspark连hive,oracle
from pyspark.sql.readwriter import DataFrameWriter,DataFrameReader

user = '******'
pwd = 'test'
url = 'jdbc:oracle:thin:@192.168.1.225:1521:ORCL'
#host = '192.168.1.225'
#url = 'jdbc:oracle:thin:@%s:1521:ORCL' % host
properties = {'user': user, 'password': pwd, 'driver': 'oracle.jdbc.driver.OracleDriver'}
#oracle数据写回oracle
dtr = DataFrameReader(sqlContext)
sf_car_test = dtr.jdbc(url=url, table='sf_car_test1', properties=properties)
#sf_car_test = spark.read.jdbc(url=url, table='sf_car_test1', properties=properties)
print('sf_car_test',type(sf_car_test))
sf_car_test.show()
dtw = DataFrameWriter(sf_car_test)
dtw.jdbc(url=url, table='sf_car_test2', mode='overwrite', properties=properties)
#dtw.jdbc(url=url, table='sf_car_test2', mode='append', properties=properties)
#sf_car_test.write.jdbc(url=url, table='sf_car_test2', properties=properties)  #append 方式写入
#sf_car_test.write.mode(saveMode="overwrite").jdbc(url=url, table='sf_car_test2', properties=properties)  #overwrite 方式写入


#转换后的表写回oracle
sf_car_test.createOrReplaceTempView("sf_car")
sf_car = spark.sql("SELECT gmsfhm,hphm FROM sf_car ")
print('sf_car',type(sf_car))
sf_car.show()
Example #2
0
lines = kvs.map(lambda x: x[1])
car_location = lines.map(lambda veh: (eval(veh)['location']))

brokers, topic = "192.168.52.31:6667", "ljd_mac"
kvs = KafkaUtils.createDirectStream(ssc, [topic], {"metadata.broker.list": brokers})
lines = kvs.map(lambda x: x[1])
mac_location = lines.map(lambda veh: (eval(veh)['location']))


host = '192.168.1.225'
user = '******'
pwd = 'test'
url = 'jdbc:oracle:thin:@%s:1521:ORCL' % host
properties = {'user': user, 'password': pwd, 'driver': 'oracle.jdbc.driver.OracleDriver'}
dtr = DataFrameReader(sqlContext)
df_ljd_sfz_wp_dict = dtr.jdbc(url=url, table='ljd_sfz_wp_dict', properties=properties)
print('df_ljd_sfz_wp_dict',type(df_ljd_sfz_wp_dict))
df_ljd_sfz_wp_dict.show()
df_ljd_sfz_wp_dict.createOrReplaceTempView("tmp_ljd_sfz_wp_dict")

def process(time, rdd):
    print("========= %s =========" % str(time))    
    try:
        spark = SparkSession.builder.config(conf=rdd.context.getConf()).getOrCreate()
        rowRdd = rdd.map(lambda w: json.dumps(w))
        wplocation = spark.read.json(rowRdd)
        print('wplocation',type(wplocation),wplocation.dtypes)    
        wplocation.show()
        wplocation.createOrReplaceTempView("tmp_kafka_wp")
        #sql_kafka_wp = spark.sql("SELECT * FROM tmp_kafka_wp")
        #print('sql_kafka_wp',type(sql_kafka_wp))
Example #3
0
from pyspark.sql import SQLContext
from pyspark.sql.functions import *
from pyspark.sql import Window
from pyspark.sql.types import StringType, StructType, StructField

#初始化
conf = SparkConf().setAppName("First_in_car")
sc = SparkContext(conf=conf)
ssc = SQLContext(sc)

host = '192.168.1.225'
user = '******'
pwd = 'test'
table = 'sf_car_test'


#读取oracle数据
from pyspark.sql.readwriter import DataFrameWriter,DataFrameReader
#数据库连接参数
url = 'jdbc:oracle:thin:@%s:1521:ORCL' % host
properties = {'user': user, 'password': pwd, 'driver': 'oracle.jdbc.driver.OracleDriver'}

#读取oracle中历史初次入城数据
dtr = DataFrameReader(ssc)
df_his_car = dtr.jdbc(url=url, table=table, properties=properties)
print('df_his_car',df_his_car)
df_his_car.show()
print(111111111111)

#sc.stop()