Ejemplo n.º 1
0
def pyspark_api():
    text_file_path = "/home/holyzing/Desktop/marvin-prod-20201125.db"
    sc = SparkConf()
    sc.setMaster("local[*]")
    sc.setAppName("PysaprkApi")
    spark = SparkSession.builder(sc).getOrCreate()
    text_file = spark.read.text(text_file_path)
    print(text_file.first(), text_file.count())
    line_with_insert = text_file.filter(text_file.value.contains("insert"))
    print(line_with_insert.count())
Ejemplo n.º 2
0
 def main(self):
     spark = SparkSession.builder().appName("testpy").enableHiveSupport().getOrCreate()
     spark.sparkContext.setCheckpointDir("/tmp/checkpoints")
     self.graph(spark)
Ejemplo n.º 3
0
from pyspark.sql import SparkSession
from pyspark import SparkConf, SparkContext
from pyspark.sql import SQLContext

conf = SparkConf().setAppName("building a warehouse")
sc = SparkContext(conf=conf)
sqlCtx = SQLContext(sc)

if __name__ == '__main__':
    logFile = "YOUR_SPARK_HOME/README.md"  # Should be some file on your system
    spark = SparkSession.builder().appName(appName).master(
        master).getOrCreate()
    logData = spark.read.text(logFile).cache()
    numAs = logData.filter(logData.value.contains('a')).count()
    numBs = logData.filter(logData.value.contains('b')).count()
    print("Lines with a: %i, lines with b: %i" % (numAs, numBs))
    spark.stop()
from pyspark.sql import SparkSession
from pyspark.sql import SQLContext
# from pyspark.conf import SparkConf
# from pyspark import SparkContext
from pyspark.sql.functions import input_file_name

#conf = SparkConf().setAppName('hello').setMaster('spark://SC-PC.localdomain:7077')
#sc = SparkContext(conf=conf)
#sc = SparkContext("local", "test")
spark = SparkSession.builder().master(
    "local").enableHiveSupport().getOrCreate()
sql = SQLContext(spark)

df = sql.read.parquet("hdfs://localhost:9000/test/database/")
names = df.select(input_file_name())
names.show()
d = input("WAIT")
#names.repartition(1).write.option("header", "true").csv("filename1.csv")

jdbcDF2 = spark.read.jdbc("jdbc:postgresql:dbserver",
                          "schema.tablename",
                          properties={
                              "user": "******",
                              "password": "******"
                          })