#3. Save the output # fileUtil.save_file(outputRDD, outputFilename, "text", "json") #4. Reduce rdds reducedRDD = workflow.reduce_rdds(numFramerPartitions, outputRDD) reducedRDD.persist() types = [ {"name": "E39_Actor", "uri": "http://www.cidoc-crm.org/cidoc-crm/E39_Actor"}, {"name": "E82_Actor_Appellation", "uri": "http://www.cidoc-crm.org/cidoc-crm/E82_Actor_Appellation"}, {"name": "E67_Birth", "uri": "http://www.cidoc-crm.org/cidoc-crm/E67_Birth"}, {"name": "E69_Death", "uri": "http://www.cidoc-crm.org/cidoc-crm/E69_Death"}, {"name": "E52_Time-Span", "uri": "http://www.cidoc-crm.org/cidoc-crm/E52_Time-Span"} ] frames = [ {"name": "npgConstituents", "url": "https://raw.githubusercontent.com/american-art/aac-alignment/master/frames/npgConsitituents.json-ld"} ] type_to_rdd_json = workflow.apply_partition_on_types(reducedRDD, types) #5. Apply framer framer_output = workflow.apply_framer(reducedRDD, type_to_rdd_json, frames, numFramerPartitions, 10) for frame_name in framer_output: #6. Map function framer_output[frame_name] = framer_output[frame_name].mapValues(mapFunc) fileUtil.save_file(framer_output[frame_name], outputFilename + "/" + frame_name, 'text', 'json') print "Save to:", ("---" + frame_name)
"name": "E82_Actor_Appellation", "uri": "http://www.cidoc-crm.org/cidoc-crm/E82_Actor_Appellation" }, { "name": "E67_Birth", "uri": "http://www.cidoc-crm.org/cidoc-crm/E67_Birth" }, { "name": "E69_Death", "uri": "http://www.cidoc-crm.org/cidoc-crm/E69_Death" }, { "name": "E52_Time-Span", "uri": "http://www.cidoc-crm.org/cidoc-crm/E52_Time-Span" }] frames = [{ "name": "npgConstituents", "url": "https://raw.githubusercontent.com/american-art/aac-alignment/master/frames/npgConsitituents.json-ld" }] framer_output = workflow.apply_framer(reducedRDD, types, frames, 5, 2) for frame_name in framer_output: outputRDD = workflow.apply_context(framer_output[frame_name], contextUrl) outputRDD_after = outputRDD.mapValues(mapFunc) if not outputRDD_after.isEmpty(): fileUtil.save_file(outputRDD_after, outputFilename + "/" + frame_name, 'text', 'json') print "Save to:", ("---" + frame_name) # workflow.save_rdd_to_es(outputRDD, es_server, es_port, es_index + "/" + frame_name)
"http://www.cidoc-crm.org/cidoc-crm/E22_Man-Made_Object1", "https://raw.githubusercontent.com/american-art/aac-alignment/master/karma-context.json", data_type="csv", additional_settings={"karma.input.delimiter":","}) #3. Save the output # fileUtil.save_file(outputRDD, outputFilename, "text", "json") reducedRDD = workflow.reduce_rdds(outputRDD) reducedRDD.persist() types = [ {"name": "E82_Actor_Appellation", "uri": "http://www.cidoc-crm.org/cidoc-crm/E82_Actor_Appellation"} ] frames = [ {"name": "AutryMakers", "url": "https://raw.githubusercontent.com/american-art/aac-alignment/master/frames/autryMakers.json-ld"} ] context = workflow.read_json_file(contextUrl) framer_output = workflow.apply_framer(reducedRDD, types, frames) for frame_name in framer_output: outputRDD = workflow.apply_context(framer_output[frame_name], context, contextUrl) #apply mapValues function outputRDD_after = outputRDD.mapValues(mapFunc) if not outputRDD_after.isEmpty(): fileUtil.save_file(outputRDD_after, outputFilename + "/" + frame_name, 'text', 'json') print "Save to:", ("---" + frame_name) # workflow.save_rdd_to_es(outputRDD, es_server, es_port, es_index + "/" + frame_name)