Python SQLContext.show Examples

Programming Language: Python

Namespace/Package Name: pyspark

Class/Type: SQLContext

Method/Function: show

Examples at hotexamples.com: 3

Python SQLContext.show - 3 examples found. These are the top rated real world Python examples of pyspark.SQLContext.show extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

sql(30)

createDataFrame(30)

SQLContext(28)

getOrCreate(17)

setConf(14)

registerDataFrameAsTable(10)

load(4)

cacheTable(4)

jsonFile(3)

show(3)

parquetFile(3)

registerFunction(3)

withColumn(2)

dropTempTable(2)

tableNames(2)

clearCache(2)

range(2)

applySchema(2)

jsonRDD(2)

inferSchema(2)

groupby(1)

printSchema(1)

select(1)

persist(1)

filter(1)

Example #1

Show file

def load_csv(spark, table):
    """
    :param spark: spark session
    :param table: table object which contains path and field describe
    :return: data frame in spark
    """
    reader = csv.reader(open(table.path, "r"), delimiter=table.delimiter)
    un_order_header = dict()
    for field_name in table.all_fields:
        if field_name not in table:
            un_order_header[field_name] = None
            continue
        field = table[field_name]
        if field.field_type == 'numeric':
            un_order_header[field_name] = float
        else:
            un_order_header[field_name] = None

    header = []
    col_type = []
    for row in reader:
        for r in row:
            if r not in un_order_header:
                raise Exception("column %s not found in configuration" % r)
            header.append(r)
            col_type.append(un_order_header[r])
        break
    col_num = len(header)

    i = 1
    data = list()
    for row in reader:
        if len(row) != col_num:
            raise Exception(
                "data not consist with header:line %d, expect %d columns, found %d"
                % (i, col_num, len(row)))
        line = list()
        for r, nm, tp in zip(row, header, col_type):
            if tp is None:
                line.append(r)
            else:
                try:
                    r = r.strip()
                    if r == '':
                        line.append(None)
                    else:
                        line.append(tp(r))
                except Exception as e:
                    raise Exception(
                        "line %d, column %s can not convert to float: %s" %
                        (i, nm, r))
        data.append(tuple(line))
        i += 1
    rdd = spark.sparkContext.parallelize(data)
    data = SQLContext(spark.sparkContext).createDataFrame(rdd, header)
    print "%s loaded!" % table.name
    print data
    data.show()
    return data

Example #2

Show file

def kmeans_training(master_url):
    sf = SparkConf()\
        .setMaster(master_url) \
        .setAppName("SparkSessionZipsExample") \
        .set("spark.executor.memory", "8g")

    sc = SparkContext(conf=sf)

    data = sc.textFile("hdfs://master32:9000/vectors/word_vector_sh.vec")

    def get_word_vec(line):
        x = []
        i = 0

        __ = line.split(" ")

        if (len(__) >= 100):
            for _ in __:
                if (i == 0):
                    i = 1
                    continue

                if (_ == ""):
                    continue

                x.append(float(_))
                i = i + 1
        else:
            for i in range(0, 100):
                x.append(float(0))
        return array(x)

    tmp = data.map(lambda line: get_word_vec(line.encode('utf-8')))
    df = SQLContext(sc).createDataFrame(tmp)
    df.show()

    return

Example #3

Show file

File: practice4.py Project: omkarkeshav/Pyspark

        ),
            
    ),
)


# In[15]:


df.printSchema()


# In[16]:


df.show()


# In[17]:


df.withColumn("date_sub_10",F.date_sub("date",10)).show()


# In[18]:


df.withColumn("date_add_10",F.date_add("date",20)).show()


# In[ ]: