#################################### #pyspark连hive,oracle from pyspark.sql.readwriter import DataFrameWriter,DataFrameReader user = '******' pwd = 'test' url = 'jdbc:oracle:thin:@192.168.1.225:1521:ORCL' #host = '192.168.1.225' #url = 'jdbc:oracle:thin:@%s:1521:ORCL' % host properties = {'user': user, 'password': pwd, 'driver': 'oracle.jdbc.driver.OracleDriver'} #oracle数据写回oracle dtr = DataFrameReader(sqlContext) sf_car_test = dtr.jdbc(url=url, table='sf_car_test1', properties=properties) #sf_car_test = spark.read.jdbc(url=url, table='sf_car_test1', properties=properties) print('sf_car_test',type(sf_car_test)) sf_car_test.show() dtw = DataFrameWriter(sf_car_test) dtw.jdbc(url=url, table='sf_car_test2', mode='overwrite', properties=properties) #dtw.jdbc(url=url, table='sf_car_test2', mode='append', properties=properties) #sf_car_test.write.jdbc(url=url, table='sf_car_test2', properties=properties) #append 方式写入 #sf_car_test.write.mode(saveMode="overwrite").jdbc(url=url, table='sf_car_test2', properties=properties) #overwrite 方式写入 #转换后的表写回oracle sf_car_test.createOrReplaceTempView("sf_car") sf_car = spark.sql("SELECT gmsfhm,hphm FROM sf_car ") print('sf_car',type(sf_car)) sf_car.show()
lines = kvs.map(lambda x: x[1]) car_location = lines.map(lambda veh: (eval(veh)['location'])) brokers, topic = "192.168.52.31:6667", "ljd_mac" kvs = KafkaUtils.createDirectStream(ssc, [topic], {"metadata.broker.list": brokers}) lines = kvs.map(lambda x: x[1]) mac_location = lines.map(lambda veh: (eval(veh)['location'])) host = '192.168.1.225' user = '******' pwd = 'test' url = 'jdbc:oracle:thin:@%s:1521:ORCL' % host properties = {'user': user, 'password': pwd, 'driver': 'oracle.jdbc.driver.OracleDriver'} dtr = DataFrameReader(sqlContext) df_ljd_sfz_wp_dict = dtr.jdbc(url=url, table='ljd_sfz_wp_dict', properties=properties) print('df_ljd_sfz_wp_dict',type(df_ljd_sfz_wp_dict)) df_ljd_sfz_wp_dict.show() df_ljd_sfz_wp_dict.createOrReplaceTempView("tmp_ljd_sfz_wp_dict") def process(time, rdd): print("========= %s =========" % str(time)) try: spark = SparkSession.builder.config(conf=rdd.context.getConf()).getOrCreate() rowRdd = rdd.map(lambda w: json.dumps(w)) wplocation = spark.read.json(rowRdd) print('wplocation',type(wplocation),wplocation.dtypes) wplocation.show() wplocation.createOrReplaceTempView("tmp_kafka_wp") #sql_kafka_wp = spark.sql("SELECT * FROM tmp_kafka_wp") #print('sql_kafka_wp',type(sql_kafka_wp))
from pyspark.sql import SQLContext from pyspark.sql.functions import * from pyspark.sql import Window from pyspark.sql.types import StringType, StructType, StructField #初始化 conf = SparkConf().setAppName("First_in_car") sc = SparkContext(conf=conf) ssc = SQLContext(sc) host = '192.168.1.225' user = '******' pwd = 'test' table = 'sf_car_test' #读取oracle数据 from pyspark.sql.readwriter import DataFrameWriter,DataFrameReader #数据库连接参数 url = 'jdbc:oracle:thin:@%s:1521:ORCL' % host properties = {'user': user, 'password': pwd, 'driver': 'oracle.jdbc.driver.OracleDriver'} #读取oracle中历史初次入城数据 dtr = DataFrameReader(ssc) df_his_car = dtr.jdbc(url=url, table=table, properties=properties) print('df_his_car',df_his_car) df_his_car.show() print(111111111111) #sc.stop()