Example #1
0
def convert_to_line(json_list):
    json_string = ""
    for line in json_list:
        json_string += json.dumps(line) + "\n"
        print(json_string)
    return json_string


def parse_json(json_data, sc):
    r = convert_to_line(json_data)
    mylist = []
    for line in r.splitlines():
        mylist.append(line)
    rdd = sc.parallelize(mylist, 8)
    df = sqlContext.read.json(rdd)
    return df


if __name__ == '__main__':
    sprk = Spark_Session()
    conn = sprk.Spark_Context()
    sql_conn = sprk.Spark_Connect()
    sqlContext = SQLContext(conn)

##https://api.github.com/users?since=100
with urllib.request.urlopen("https://api.github.com/users?since=100") as url:
    data = parse_json(parse(url.read().decode("utf-8")), conn)

data.show()
Example #2
0
from pyspark import Row
from pyspark.streaming import StreamingContext
from pyspark.streaming.kafka import KafkaUtils
from pyspark.sql.context import SQLContext
import pandas as pd
import re
from pyspark.sql.functions import *
from operator import add
from Pycrypto import Spark_Session


def sum_cal(v):
    sum = 0.0
    for i in v:
        sum += i
    return sum


if __name__ == '__main__':
    sprk = Spark_Session()
    conn = sprk.Spark_Context()
    lines = conn.textFile("/Users/shuvamoymondal/Downloads/emp.txt").map(
        lambda v: v.split(",")).map(lambda g: (g[5], float(g[4])))

print(lines.collect())
v = lines.map(lambda h: h[1]).max()
print(v)