""" wordcount example using the rdd api, we'll write a test for this """ from __future__ import print_function import sys from pyspark import SparkContext from pyspark import HiveContext def do_json_counts(df, target_name): """ count of records where name=target_name in a dataframe with column 'name' """ return df.filter(df.name == target_name).count() if __name__ == "__main__": if len(sys.argv) != 2: sys.exit("Usage: json file}") sc = SparkContext(appName="PythonJsonCount") hc = HiveContext.getOrCreate(sc) df = hc.read.json(sys.argv[1], 1) print("Name vikas found %d times" % do_json_counts(df, 'vikas'))
from valida_data_ref_carga import verifica_data_ref_carga # ## Definições para o contexto do Spark # In[4]: conf = SparkConf().setAppName("Test_movto_validos").setMaster("local[2]") # In[5]: spark = SparkSession.builder.enableHiveSupport().config( conf=conf).getOrCreate() # In[6]: spark_hive = HiveContext.getOrCreate(spark) # In[7]: #retirar quando for implantar #if sc.startTime != 0: sc.stop() # In[8]: sc = SparkContext.getOrCreate() # In[9]: sql_ctx = SQLContext(sc) # In[10]:
""" wordcount example using the rdd api, we'll write a test for this """ from __future__ import print_function import sys from pyspark import SparkContext from pyspark import HiveContext def do_json_counts(df, target_name): """ count of records where name=target_name in a dataframe with column 'name' """ return df.filter(df.name == target_name).count() if __name__ == "__main__": if len(sys.argv) != 2: sys.exit("Usage: json file}") sc = SparkContext(appName="PythonJsonCount") hc = HiveContext.getOrCreate(sc) df = hc.read.json(sys.argv[1], 1) print("Name vikas found %d times" % do_json_counts(df, 'vikas'))