Python SQLContext.load Exemples

Langage de programmation: Python

Espace de nommage/Pack: pyspark

Class/Type: SQLContext

Méthode/Fonction: load

Exemples au hotexamples.com: 8

Python SQLContext.load - 8 exemples trouvés. Ce sont les exemples réels les mieux notés de pyspark.SQLContext.load extraits de projets open source. Vous pouvez noter les exemples pour nous aider à en améliorer la qualité.

Méthodes fréquemment utilisées

Afficher Cacher

sql(30)

createDataFrame(30)

SQLContext(28)

getOrCreate(17)

setConf(14)

registerDataFrameAsTable(10)

load(4)

cacheTable(4)

jsonFile(3)

show(3)

parquetFile(3)

registerFunction(3)

withColumn(2)

dropTempTable(2)

tableNames(2)

clearCache(2)

range(2)

applySchema(2)

jsonRDD(2)

inferSchema(2)

groupby(1)

printSchema(1)

select(1)

persist(1)

filter(1)

Méthodes fréquemment utilisées

sql (30)

createDataFrame (30)

SQLContext (28)

getOrCreate (17)

setConf (14)

registerDataFrameAsTable (10)

load (4)

cacheTable (4)

jsonFile (3)

show (3)

Méthodes fréquemment utilisées

parquetFile (3)

registerFunction (3)

withColumn (2)

dropTempTable (2)

tableNames (2)

clearCache (2)

range (2)

applySchema (2)

jsonRDD (2)

inferSchema (2)

groupby (1)

printSchema (1)

select (1)

persist (1)

filter (1)

Méthodes fréquemment utilisées

groupby (1)

printSchema (1)

select (1)

persist (1)

filter (1)

Associées

isatty

process_events

colisionan

PageletForm

Ciphertext

decode_mongo_query

parseWhitelist

HangmanGame

Projects

powspec

Related in langs

convert_amp (PHP)

wbAdvert_config (PHP)

Input_F350_PlanSet (C#)

ManagerMagazynow (C#)

onion_handler_new (C++)

OnRemoteKill (C++)

Line (Go)

Kind (Go)

IAppRepositoryManager (Java)

HashSet (Java)

Exemple #1

0

Afficher le fichier

Fichier : app_collaborative.py Projet : JimTravis/spark-recommendation-engine

CLOUDSQL_PWD = sys.argv[4] BEST_RANK = int(sys.argv[5]) BEST_ITERATION = int(sys.argv[6]) BEST_REGULATION = float(sys.argv[7]) TABLE_ITEMS = "Accommodation" TABLE_RATINGS = "Rating" TABLE_RECOMMENDATIONS = "Recommendation" # Read the data from the Cloud SQL # Create dataframes #[START read_from_sql] jdbcDriver = 'com.mysql.jdbc.Driver' jdbcUrl = 'jdbc:mysql://%s:3306/%s?user=%s&password=%s' % (CLOUDSQL_INSTANCE_IP, CLOUDSQL_NAME, CLOUDSQL_USER, CLOUDSQL_PWD) dfAccos = sqlContext.load(source='jdbc', driver=jdbcDriver, url=jdbcUrl, dbtable=TABLE_ITEMS) dfRates = sqlContext.load(source='jdbc', driver=jdbcDriver, url=jdbcUrl, dbtable=TABLE_RATINGS) #[END read_from_sql] # Get all the ratings rows of our user dfUserRatings = dfRates.filter(dfRates.userId == USER_ID).map(lambda r: r.accoId).collect() print(dfUserRatings) # Returns only the accommodations that have not been rated by our user rddPotential = dfAccos.rdd.filter(lambda x: x[0] not in dfUserRatings) pairsPotential = rddPotential.map(lambda x: (USER_ID, x[0])) #[START split_sets] rddTraining, rddValidating, rddTesting = dfRates.rdd.randomSplit([6,2,2]) #[END split_sets]

Exemple #2

0

Afficher le fichier

Fichier : app_collaborative_t.py Projet : watthieu/gcp-recommendation

from pyspark.sql.types import StructField from pyspark.sql.types import StringType conf = SparkConf().setAppName("app_collaborative") sc = SparkContext(conf=conf) sqlContext = SQLContext(sc) jdbcDriver = 'com.mysql.jdbc.Driver' jdbcUrl = 'jdbc:mysql://173.194.227.120:3306/recoom?user=root' USER_ID = 0 # Read the data from the Cloud SQL # Create dataframes dfAccos = sqlContext.load(source='jdbc', driver=jdbcDriver, url=jdbcUrl, dbtable='AccommodationT') dfRates = sqlContext.load(source='jdbc', driver=jdbcDriver, url=jdbcUrl, dbtable='RatingT') # Get all the ratings rows of our user dfUserRatings = dfRates.filter( dfRates.userId == USER_ID).map(lambda r: r.accoId).collect() print(dfUserRatings) # Returns only the accos that have not been rated by our user rddPotential = dfAccos.rdd.filter(lambda x: x[0] not in dfUserRatings) pairsPotential = rddPotential.map(lambda x: (USER_ID, x[0]))

Exemple #3

0

Afficher le fichier

# Returns the pairs (prediction, rating) predictionsAndRatings = predictions.join(againstWiRatings).values() # Returns the variance return sqrt( predictionsAndRatings.map(lambda s: (s[0] - s[1])**2).reduce(add) / float(sizeAgainst)) #[END how_far] # Read the data from the Cloud SQL # Create dataframes dfRates = sqlContext.load(source='jdbc', driver=jdbcDriver, url=jdbcUrl, dbtable='Rating') rddUserRatings = dfRates.filter(dfRates.userId == 0).rdd print(rddUserRatings.count()) # Split the data in 3 different sets : training, validating, testing # 60% 20% 20% rddRates = dfRates.rdd rddTraining, rddValidating, rddTesting = rddRates.randomSplit([6, 2, 2]) #Add user ratings in the training model rddTraining.union(rddUserRatings) nbValidating = rddValidating.count() nbTesting = rddTesting.count()

Exemple #4

0

Afficher le fichier

Fichier : find_model_collaborative.py Projet : JimTravis/spark-recommendation-engine

againstWiRatings = against.map(lambda x: ((int(x[0]),int(x[1])), int(x[2])) ) # Make a prediction and map it for later comparison # The map has to be ((user,product), rating) not ((product,user), rating) predictions = model.predictAll(againstNoRatings).map(lambda p: ( (p[0],p[1]), p[2]) ) # Returns the pairs (prediction, rating) predictionsAndRatings = predictions.join(againstWiRatings).values() # Returns the variance return sqrt(predictionsAndRatings.map(lambda s: (s[0] - s[1]) ** 2).reduce(add) / float(sizeAgainst)) #[END how_far] # Read the data from the Cloud SQL # Create dataframes dfRates = sqlContext.load(source='jdbc', driver=jdbcDriver, url=jdbcUrl, dbtable='Rating') rddUserRatings = dfRates.filter(dfRates.userId == 0).rdd print(rddUserRatings.count()) # Split the data in 3 different sets : training, validating, testing # 60% 20% 20% rddRates = dfRates.rdd rddTraining, rddValidating, rddTesting = rddRates.randomSplit([6,2,2]) #Add user ratings in the training model rddTraining.union(rddUserRatings) nbValidating = rddValidating.count() nbTesting = rddTesting.count() print("Training: %d, validation: %d, test: %d" % (rddTraining.count(), nbValidating, rddTesting.count()))

Exemple #5

0

Afficher le fichier

BEST_RANK = int(sys.argv[5]) BEST_ITERATION = int(sys.argv[6]) BEST_REGULATION = float(sys.argv[7]) TABLE_ITEMS = "Accommodation" TABLE_RATINGS = "Rating" TABLE_RECOMMENDATIONS = "Recommendation" # Read the data from the Cloud SQL # Create dataframes #[START read_from_sql] jdbcDriver = 'com.mysql.jdbc.Driver' jdbcUrl = 'jdbc:mysql://%s:3306/%s?user=%s&password=%s' % ( CLOUDSQL_INSTANCE_IP, CLOUDSQL_NAME, CLOUDSQL_USER, CLOUDSQL_PWD) dfAccos = sqlContext.load(source='jdbc', driver=jdbcDriver, url=jdbcUrl, dbtable=TABLE_ITEMS) dfRates = sqlContext.load(source='jdbc', driver=jdbcDriver, url=jdbcUrl, dbtable=TABLE_RATINGS) #[END read_from_sql] # Get all the ratings rows of our user dfUserRatings = dfRates.filter( dfRates.userId == USER_ID).map(lambda r: r.accoId).collect() print(dfUserRatings) # Returns only the accommodations that have not been rated by our user rddPotential = dfAccos.rdd.filter(lambda x: x[0] not in dfUserRatings) pairsPotential = rddPotential.map(lambda x: (USER_ID, x[0]))

Exemple #6

0

Afficher le fichier

Fichier : app_collaborative_t.py Projet : watthieu/gcp-recommendation

from pyspark.sql.types import StructType from pyspark.sql.types import StructField from pyspark.sql.types import StringType conf = SparkConf().setAppName("app_collaborative") sc = SparkContext(conf=conf) sqlContext = SQLContext(sc) jdbcDriver = 'com.mysql.jdbc.Driver' jdbcUrl = 'jdbc:mysql://173.194.227.120:3306/recoom?user=root' USER_ID = 0 # Read the data from the Cloud SQL # Create dataframes dfAccos = sqlContext.load(source='jdbc', driver=jdbcDriver, url=jdbcUrl, dbtable='AccommodationT') dfRates = sqlContext.load(source='jdbc', driver=jdbcDriver, url=jdbcUrl, dbtable='RatingT') # Get all the ratings rows of our user dfUserRatings = dfRates.filter(dfRates.userId == USER_ID).map(lambda r: r.accoId).collect() print(dfUserRatings) # Returns only the accos that have not been rated by our user rddPotential = dfAccos.rdd.filter(lambda x: x[0] not in dfUserRatings) pairsPotential = rddPotential.map(lambda x: (USER_ID, x[0])) rddTraining, rddValidating, rddTesting = dfRates.rdd.randomSplit([6,2,2]) model = ALS.train(rddTraining, 20, 20, 0.1) """

Exemple #7

0

Afficher le fichier

Fichier : apriorialgoithm_pyspark.py Projet : tanhimislam/Apriori-Algorithm

filtered_list = [list(x) for x in subset_list_filtered] for subset in filtered_list: for rule in association_rules: r_0 = sorted(list(rule[0])) r_1 = sorted(list(rule[1])) if subset == r_0 and len(subset) > 1 and ( 'কথা' in subset) and len(r_1) == 1 and rule[2] > 90: print(r_0, '>', r_1, rule[2]) print('-----------------------------') # starting point of the program if __name__ == '__main__': # name of the file to read input_file_name = sqlContext.load(Rdd1) test_file_name = sqlContext.load(Rdd1) # minimum support threshold minimum_support_threshold = 3 # minimum confidence threshold minimum_confidence_threshold = 90 # creating the Apriori object apriori = Apriori() # reading data from the file apriori.read_file(input_file_name) # executing the apriori algorithm print( '##########################################################################################' ) print('Training Phase') print(

Exemple #8

0

Afficher le fichier

Fichier : experimentsone.py Projet : sunil3loq/sparking

sqc=SQLContext(sc) #idea is to read the csv directly in to the dataframe of spark #defining the schema #msisdn,SongUniqueCode,Duration,Circle,DATE,DNIS,MODE,businesscategory #9037991838,Hun-14-63767,202,Kolkata,10/1/2014,59090,,HindiTop20 mySchema=sql.types.StructType([ sql.types.StructField("msisdn",sql.types.StringType(),False), sql.types.StructField("songid",sql.types.StringType(),False), sql.types.StructField("duration",sql.types.IntegerType(),True), sql.types.StructField("Circle",sql.types.StringType(),True), sql.types.StructField("date",sql.types.StringType(),True), sql.types.StructField("mode",sql.types.StringType(),True), sql.types.StructField("businesscategory",sql.types.StringType(),True) ]) transdf=sqc.load(source="com.databricks.spark.csv",path ="file:///home/loq/sunil/spark/content_data.csv",schema=mySchema) transdf.take(2) #reading the testfile way ''' transrdd=sc.textFile("file:///home/loq/sunil/spark/content_data.csv").\ map(lambda x: x.split(',')).\ map(lambda y: sql.Row(msisdn=y[0],songid=y[1],duration=y[2],circle=y[3],businesscategory=y[7])) print transrdd.take(2) '''