java_import(sc._jvm, "edu.isi.karma") inputFilename = argv[1] outputFilename = argv[2] fileUtil = FileUtil(sc) workflow = Workflow(sc) contextUrl = "https://raw.githubusercontent.com/american-art/aac-alignment/master/karma-context.json" #1. Read the input #test big file # inputRDD = workflow.batch_read_csv(inputFilename).partitionBy(1000) #test small file inputRDD = workflow.batch_read_csv(inputFilename) #2. Apply the karma Model outputRDD = workflow.run_karma( inputRDD, "https://raw.githubusercontent.com/american-art/npg/master/NPGConstituents/NPGConstituents-model.ttl", "http://dig.isi.edu/npgConstituents/", "http://www.cidoc-crm.org/cidoc-crm/E39_Actor1", "https://raw.githubusercontent.com/american-art/aac-alignment/master/karma-context.json", data_type="csv", additional_settings={"karma.input.delimiter": ","}) #3. Save the output # fileUtil.save_file(outputRDD, outputFilename, "text", "json") reducedRDD = workflow.reduce_rdds(outputRDD)
sc = SparkContext(appName="TEST") java_import(sc._jvm, "edu.isi.karma") inputFilename = argv[1] outputFilename = argv[2] numPartitions = 1000 numFramerPartitions = max(10, numPartitions / 10) fileUtil = FileUtil(sc) workflow = Workflow(sc) contextUrl = "https://raw.githubusercontent.com/american-art/aac-alignment/master/karma-context.json" #1. Read the input inputRDD = workflow.batch_read_csv(inputFilename) #2. Apply the karma Model outputRDD = workflow.run_karma(inputRDD, "https://raw.githubusercontent.com/american-art/npg/master/NPGConstituents/NPGConstituents-model.ttl", "http://americanartcollaborative.org/npg/", "http://www.cidoc-crm.org/cidoc-crm/E39_Actor1", "https://raw.githubusercontent.com/american-art/aac-alignment/master/karma-context.json", num_partitions=numPartitions, data_type="csv", additional_settings={"karma.input.delimiter":","}) #3. Save the output # fileUtil.save_file(outputRDD, outputFilename, "text", "json") #4. Reduce rdds
line = line.rstrip() params = line.split("\t") data_file_URL = str(params[0]) num_partitions = int(params[1]) model_file_URL= str(params[2]) base = str(params[3]) root = str(params[4]) context = str(params[5]) output_folder = str(params[6]) output_zip_path = str(params[7]) #0. Download data file dataFileName = download_file(data_file_URL) #1. Read the input inputRDD = workflow.batch_read_csv(dataFileName).partitionBy(num_partitions) #2. Apply the karma Model outputRDD = workflow.run_karma(inputRDD, model_file_URL, base, root, context, data_type="csv", additional_settings={"karma.input.delimiter":",", "karma.output.format": "n3"}) #3. Save the output outputPath = outputFilename + "/" + output_folder outputRDD.map(lambda x: x[1]).saveAsTextFile(outputPath) print "Successfully apply karma!"
sc = SparkContext(appName="TEST") java_import(sc._jvm, "edu.isi.karma") inputFilename = argv[1] outputFilename = argv[2] fileUtil = FileUtil(sc) workflow = Workflow(sc) contextUrl = "https://raw.githubusercontent.com/american-art/aac-alignment/master/karma-context.json" #1. Read the input #test big file inputRDD = workflow.batch_read_csv(inputFilename).partitionBy(1) #test small file # inputRDD = workflow.batch_read_csv(inputFilename) #2. Apply the karma Model outputRDD = workflow.run_karma(inputRDD, "https://raw.githubusercontent.com/american-art/autry/master/AutryMakers/AutryMakers-model.ttl", "http://dig.isi.edu/AutryMakers/", "http://www.cidoc-crm.org/cidoc-crm/E22_Man-Made_Object1", "https://raw.githubusercontent.com/american-art/aac-alignment/master/karma-context.json", data_type="csv", additional_settings={"karma.input.delimiter":","}) #3. Save the output
sc = SparkContext(appName="TEST") java_import(sc._jvm, "edu.isi.karma") inputFilename = argv[1] outputFilename = argv[2] fileUtil = FileUtil(sc) workflow = Workflow(sc) contextUrl = "https://raw.githubusercontent.com/american-art/aac-alignment/master/karma-context.json" #1. Read the input #test big file inputRDD = workflow.batch_read_csv(inputFilename).partitionBy(1) #test small file # inputRDD = workflow.batch_read_csv(inputFilename) #2. Apply the karma Model outputRDD = workflow.run_karma( inputRDD, "https://raw.githubusercontent.com/american-art/autry/master/AutryMakers/AutryMakers-model.ttl", "http://dig.isi.edu/AutryMakers/", "http://www.cidoc-crm.org/cidoc-crm/E22_Man-Made_Object1", "https://raw.githubusercontent.com/american-art/aac-alignment/master/karma-context.json", data_type="csv", additional_settings={"karma.input.delimiter": ","}) #3. Save the output