Exemple #1
0
 def _get_relation(self):
     # 从hdfs上读取关联信息
     path = self.spark.hdfs_base + 'relation'
     reader = DataFrameReader(self.spark.sqlctx)
     try:
         df = reader.load(path)
     except Exception as e:
         self.update_relation()
         df = reader.load(path)
     return df
Exemple #2
0
    def read(self) -> DataFrameReader:
        """
        Returns a :class:`DataFrameReader` that can be used to read data
        in as a :class:`DataFrame`.

        .. versionadded:: 2.0.0

        Returns
        -------
        :class:`DataFrameReader`

        Examples
        --------
        >>> spark.read
        <pyspark.sql.readwriter.DataFrameReader object ...>

        Write a DataFrame into a JSON file and read it back.

        >>> import tempfile
        >>> with tempfile.TemporaryDirectory() as d:
        ...     # Write a DataFrame into a JSON file
        ...     spark.createDataFrame(
        ...         [{"age": 100, "name": "Hyukjin Kwon"}]
        ...     ).write.mode("overwrite").format("json").save(d)
        ...
        ...     # Read the JSON file as a DataFrame.
        ...     spark.read.format('json').load(d).show()
        +---+------------+
        |age|        name|
        +---+------------+
        |100|Hyukjin Kwon|
        +---+------------+
        """
        return DataFrameReader(self)
Exemple #3
0
    def read(self):
        """
        Returns a :class:`DataFrameReader` that can be used to read data
        in as a :class:`DataFrame`.

        :return: :class:`DataFrameReader`
        """
        return DataFrameReader(self._wrapped)
Exemple #4
0
    def read(self):
        """
        Returns a :class:`DataFrameReader` that can be used to read data
        in as a :class:`DataFrame`.

        >>> sqlContext.read
        <pyspark.sql.readwriter.DataFrameReader object at ...>
        """
        return DataFrameReader(self)
    def read(self):
        """
        Returns a :class:`DataFrameReader` that can be used to read data
        in as a :class:`DataFrame`.
		返回一个`DataFrameReader`类,它可以从读取数据为`DataFrame`类。

        :return: :class:`DataFrameReader`
        """
        return DataFrameReader(self)
Exemple #6
0
    def read(self) -> DataFrameReader:
        """
        Returns a :class:`DataFrameReader` that can be used to read data
        in as a :class:`DataFrame`.

        .. versionadded:: 2.0.0

        Returns
        -------
        :class:`DataFrameReader`
        """
        return DataFrameReader(self._wrapped)
Exemple #7
0
    def read(self) -> DataFrameReader:
        """
        Returns a :class:`DataFrameReader` that can be used to read data
        in as a :class:`DataFrame`.

        .. versionadded:: 1.4.0

        Returns
        -------
        :class:`DataFrameReader`
        """
        return DataFrameReader(self.sparkSession)
Exemple #8
0
kvs = KafkaUtils.createDirectStream(ssc, [topic], {"metadata.broker.list": brokers})
lines = kvs.map(lambda x: x[1])
car_location = lines.map(lambda veh: (eval(veh)['location']))

brokers, topic = "192.168.52.31:6667", "ljd_mac"
kvs = KafkaUtils.createDirectStream(ssc, [topic], {"metadata.broker.list": brokers})
lines = kvs.map(lambda x: x[1])
mac_location = lines.map(lambda veh: (eval(veh)['location']))


host = '192.168.1.225'
user = '******'
pwd = 'test'
url = 'jdbc:oracle:thin:@%s:1521:ORCL' % host
properties = {'user': user, 'password': pwd, 'driver': 'oracle.jdbc.driver.OracleDriver'}
dtr = DataFrameReader(sqlContext)
df_ljd_sfz_wp_dict = dtr.jdbc(url=url, table='ljd_sfz_wp_dict', properties=properties)
print('df_ljd_sfz_wp_dict',type(df_ljd_sfz_wp_dict))
df_ljd_sfz_wp_dict.show()
df_ljd_sfz_wp_dict.createOrReplaceTempView("tmp_ljd_sfz_wp_dict")

def process(time, rdd):
    print("========= %s =========" % str(time))    
    try:
        spark = SparkSession.builder.config(conf=rdd.context.getConf()).getOrCreate()
        rowRdd = rdd.map(lambda w: json.dumps(w))
        wplocation = spark.read.json(rowRdd)
        print('wplocation',type(wplocation),wplocation.dtypes)    
        wplocation.show()
        wplocation.createOrReplaceTempView("tmp_kafka_wp")
        #sql_kafka_wp = spark.sql("SELECT * FROM tmp_kafka_wp")
Exemple #9
0



####################################
#pyspark连hive,oracle
from pyspark.sql.readwriter import DataFrameWriter,DataFrameReader

user = '******'
pwd = 'test'
url = 'jdbc:oracle:thin:@192.168.1.225:1521:ORCL'
#host = '192.168.1.225'
#url = 'jdbc:oracle:thin:@%s:1521:ORCL' % host
properties = {'user': user, 'password': pwd, 'driver': 'oracle.jdbc.driver.OracleDriver'}
#oracle数据写回oracle
dtr = DataFrameReader(sqlContext)
sf_car_test = dtr.jdbc(url=url, table='sf_car_test1', properties=properties)
#sf_car_test = spark.read.jdbc(url=url, table='sf_car_test1', properties=properties)
print('sf_car_test',type(sf_car_test))
sf_car_test.show()
dtw = DataFrameWriter(sf_car_test)
dtw.jdbc(url=url, table='sf_car_test2', mode='overwrite', properties=properties)
#dtw.jdbc(url=url, table='sf_car_test2', mode='append', properties=properties)
#sf_car_test.write.jdbc(url=url, table='sf_car_test2', properties=properties)  #append 方式写入
#sf_car_test.write.mode(saveMode="overwrite").jdbc(url=url, table='sf_car_test2', properties=properties)  #overwrite 方式写入


#转换后的表写回oracle
sf_car_test.createOrReplaceTempView("sf_car")
sf_car = spark.sql("SELECT gmsfhm,hphm FROM sf_car ")
print('sf_car',type(sf_car))
Exemple #10
0
from pyspark.sql import SQLContext
from pyspark.sql.functions import *
from pyspark.sql import Window
from pyspark.sql.types import StringType, StructType, StructField

#初始化
conf = SparkConf().setAppName("First_in_car")
sc = SparkContext(conf=conf)
ssc = SQLContext(sc)

host = '192.168.1.225'
user = '******'
pwd = 'test'
table = 'sf_car_test'


#读取oracle数据
from pyspark.sql.readwriter import DataFrameWriter,DataFrameReader
#数据库连接参数
url = 'jdbc:oracle:thin:@%s:1521:ORCL' % host
properties = {'user': user, 'password': pwd, 'driver': 'oracle.jdbc.driver.OracleDriver'}

#读取oracle中历史初次入城数据
dtr = DataFrameReader(ssc)
df_his_car = dtr.jdbc(url=url, table=table, properties=properties)
print('df_his_car',df_his_car)
df_his_car.show()
print(111111111111)

#sc.stop()