Example #1
0
def process_insert_rdd(rdds):
    basic_start_time = time.time()
    try:
        event_df = SparkSession(rdds.context).createDataFrame(
            rdds.filter(is_not_empty).map(to_json).map(
                lambda x: x['data']).map(lambda x: x['document']))
        print(event_df.show())

# db writer logic is put here that is application dependent
# writer logic to db here
    except Exception as e:
        print("Error in parsing of event schema")
        print(e)

    time_taken = int(time.time() - basic_start_time)
    print("Time taken for insert rdd is: " + str(time_taken))
Example #2
0
def process_update_rdd(rdds):
	basic_start_time = time.time()
	try:
		event_df = SparkSession(rdds.context).createDataFrame(rdds.filter(is_not_empty).map(to_json).map(lambda x : x['data']).map(lambda x : x['document']))
		print(event_df.show())

		index_column = ['id']
		table_name=''

		sql = updateTableSql(event_df, name=table_name, index_columns=index_column)
		print(sql)
		# writer update logic to db here
	except Exception as e:
		print("Error in parsing of event schema")
		print(e)

	time_taken = int(time.time() - basic_start_time)
	print("Time taken for update rdd is: " + str(time_taken))