Example #1
0
from pyspark import SparkContext
from sparklingpandas.pcontext import PSparkContext

# Create spark context.
sc = SparkContext("local", "example iris data")

# Create sparklingpandas context.
psc = PSparkContext(sc)

# read csv file with sparkling pandas context.
COL_NAMES = ['sepal_length',
             'sepal_width',
             'petal_length',
             'petal_width',
             'class']
iris_df = psc.read_csv("ex-data/iris.csv", names = COL_NAMES)

# Groupby Category
iris_classes = iris_df.groupby('class')
Example #2
0
from pyspark import SparkContext
from sparklingpandas.pcontext import PSparkContext

# Create spark context.
sc = SparkContext("local", "example sfpd data")

# Create sparklingpandas context.
psc = PSparkContext(sc)

# read csv file with sparkling pandas context.
spDF = psc.read_csv("ex-data/SFPD_Incidents.csv")
spDF.groupby("Category").count().collect().to_csv(
    path_or_buf="/home/juliet/src/sparklingpandas-ex/my_csv.csv")