def _get_relation(self): # 从hdfs上读取关联信息 path = self.spark.hdfs_base + 'relation' reader = DataFrameReader(self.spark.sqlctx) try: df = reader.load(path) except Exception as e: self.update_relation() df = reader.load(path) return df
def read(self) -> DataFrameReader: """ Returns a :class:`DataFrameReader` that can be used to read data in as a :class:`DataFrame`. .. versionadded:: 2.0.0 Returns ------- :class:`DataFrameReader` Examples -------- >>> spark.read <pyspark.sql.readwriter.DataFrameReader object ...> Write a DataFrame into a JSON file and read it back. >>> import tempfile >>> with tempfile.TemporaryDirectory() as d: ... # Write a DataFrame into a JSON file ... spark.createDataFrame( ... [{"age": 100, "name": "Hyukjin Kwon"}] ... ).write.mode("overwrite").format("json").save(d) ... ... # Read the JSON file as a DataFrame. ... spark.read.format('json').load(d).show() +---+------------+ |age| name| +---+------------+ |100|Hyukjin Kwon| +---+------------+ """ return DataFrameReader(self)
def read(self): """ Returns a :class:`DataFrameReader` that can be used to read data in as a :class:`DataFrame`. :return: :class:`DataFrameReader` """ return DataFrameReader(self._wrapped)
def read(self): """ Returns a :class:`DataFrameReader` that can be used to read data in as a :class:`DataFrame`. >>> sqlContext.read <pyspark.sql.readwriter.DataFrameReader object at ...> """ return DataFrameReader(self)
def read(self): """ Returns a :class:`DataFrameReader` that can be used to read data in as a :class:`DataFrame`. 返回一个`DataFrameReader`类,它可以从读取数据为`DataFrame`类。 :return: :class:`DataFrameReader` """ return DataFrameReader(self)
def read(self) -> DataFrameReader: """ Returns a :class:`DataFrameReader` that can be used to read data in as a :class:`DataFrame`. .. versionadded:: 2.0.0 Returns ------- :class:`DataFrameReader` """ return DataFrameReader(self._wrapped)
def read(self) -> DataFrameReader: """ Returns a :class:`DataFrameReader` that can be used to read data in as a :class:`DataFrame`. .. versionadded:: 1.4.0 Returns ------- :class:`DataFrameReader` """ return DataFrameReader(self.sparkSession)
kvs = KafkaUtils.createDirectStream(ssc, [topic], {"metadata.broker.list": brokers}) lines = kvs.map(lambda x: x[1]) car_location = lines.map(lambda veh: (eval(veh)['location'])) brokers, topic = "192.168.52.31:6667", "ljd_mac" kvs = KafkaUtils.createDirectStream(ssc, [topic], {"metadata.broker.list": brokers}) lines = kvs.map(lambda x: x[1]) mac_location = lines.map(lambda veh: (eval(veh)['location'])) host = '192.168.1.225' user = '******' pwd = 'test' url = 'jdbc:oracle:thin:@%s:1521:ORCL' % host properties = {'user': user, 'password': pwd, 'driver': 'oracle.jdbc.driver.OracleDriver'} dtr = DataFrameReader(sqlContext) df_ljd_sfz_wp_dict = dtr.jdbc(url=url, table='ljd_sfz_wp_dict', properties=properties) print('df_ljd_sfz_wp_dict',type(df_ljd_sfz_wp_dict)) df_ljd_sfz_wp_dict.show() df_ljd_sfz_wp_dict.createOrReplaceTempView("tmp_ljd_sfz_wp_dict") def process(time, rdd): print("========= %s =========" % str(time)) try: spark = SparkSession.builder.config(conf=rdd.context.getConf()).getOrCreate() rowRdd = rdd.map(lambda w: json.dumps(w)) wplocation = spark.read.json(rowRdd) print('wplocation',type(wplocation),wplocation.dtypes) wplocation.show() wplocation.createOrReplaceTempView("tmp_kafka_wp") #sql_kafka_wp = spark.sql("SELECT * FROM tmp_kafka_wp")
#################################### #pyspark连hive,oracle from pyspark.sql.readwriter import DataFrameWriter,DataFrameReader user = '******' pwd = 'test' url = 'jdbc:oracle:thin:@192.168.1.225:1521:ORCL' #host = '192.168.1.225' #url = 'jdbc:oracle:thin:@%s:1521:ORCL' % host properties = {'user': user, 'password': pwd, 'driver': 'oracle.jdbc.driver.OracleDriver'} #oracle数据写回oracle dtr = DataFrameReader(sqlContext) sf_car_test = dtr.jdbc(url=url, table='sf_car_test1', properties=properties) #sf_car_test = spark.read.jdbc(url=url, table='sf_car_test1', properties=properties) print('sf_car_test',type(sf_car_test)) sf_car_test.show() dtw = DataFrameWriter(sf_car_test) dtw.jdbc(url=url, table='sf_car_test2', mode='overwrite', properties=properties) #dtw.jdbc(url=url, table='sf_car_test2', mode='append', properties=properties) #sf_car_test.write.jdbc(url=url, table='sf_car_test2', properties=properties) #append 方式写入 #sf_car_test.write.mode(saveMode="overwrite").jdbc(url=url, table='sf_car_test2', properties=properties) #overwrite 方式写入 #转换后的表写回oracle sf_car_test.createOrReplaceTempView("sf_car") sf_car = spark.sql("SELECT gmsfhm,hphm FROM sf_car ") print('sf_car',type(sf_car))
from pyspark.sql import SQLContext from pyspark.sql.functions import * from pyspark.sql import Window from pyspark.sql.types import StringType, StructType, StructField #初始化 conf = SparkConf().setAppName("First_in_car") sc = SparkContext(conf=conf) ssc = SQLContext(sc) host = '192.168.1.225' user = '******' pwd = 'test' table = 'sf_car_test' #读取oracle数据 from pyspark.sql.readwriter import DataFrameWriter,DataFrameReader #数据库连接参数 url = 'jdbc:oracle:thin:@%s:1521:ORCL' % host properties = {'user': user, 'password': pwd, 'driver': 'oracle.jdbc.driver.OracleDriver'} #读取oracle中历史初次入城数据 dtr = DataFrameReader(ssc) df_his_car = dtr.jdbc(url=url, table=table, properties=properties) print('df_his_car',df_his_car) df_his_car.show() print(111111111111) #sc.stop()