Beispiel #1
0
    workflow = Workflow(sc)
    contextUrl = "https://raw.githubusercontent.com/american-art/aac-alignment/master/karma-context.json"

    #1. Read the input

    #test big file
    # inputRDD = workflow.batch_read_csv(inputFilename).partitionBy(1000)

    #test small file
    inputRDD = workflow.batch_read_csv(inputFilename)

    #2. Apply the karma Model
    outputRDD = workflow.run_karma(
        inputRDD,
        "https://raw.githubusercontent.com/american-art/npg/master/NPGConstituents/NPGConstituents-model.ttl",
        "http://dig.isi.edu/npgConstituents/",
        "http://www.cidoc-crm.org/cidoc-crm/E39_Actor1",
        "https://raw.githubusercontent.com/american-art/aac-alignment/master/karma-context.json",
        data_type="csv",
        additional_settings={"karma.input.delimiter": ","})

    #3. Save the output
    # fileUtil.save_file(outputRDD, outputFilename, "text", "json")

    reducedRDD = workflow.reduce_rdds(outputRDD)
    reducedRDD.persist()
    types = [{
        "name": "E39_Actor",
        "uri": "http://www.cidoc-crm.org/cidoc-crm/E39_Actor"
    }, {
        "name": "E82_Actor_Appellation",
        "uri": "http://www.cidoc-crm.org/cidoc-crm/E82_Actor_Appellation"
    outputFilename = argv[2]
    numPartitions = 1000
    numFramerPartitions = max(10, numPartitions / 10)

    fileUtil = FileUtil(sc)
    workflow = Workflow(sc)
    contextUrl = "https://raw.githubusercontent.com/american-art/aac-alignment/master/karma-context.json"

    #1. Read the input
    inputRDD = workflow.batch_read_csv(inputFilename)

    #2. Apply the karma Model
    outputRDD = workflow.run_karma(inputRDD,
                                   "https://raw.githubusercontent.com/american-art/npg/master/NPGConstituents/NPGConstituents-model.ttl",
                                   "http://americanartcollaborative.org/npg/",
                                   "http://www.cidoc-crm.org/cidoc-crm/E39_Actor1",
                                   "https://raw.githubusercontent.com/american-art/aac-alignment/master/karma-context.json",
                                   num_partitions=numPartitions,
                                   data_type="csv",
                                   additional_settings={"karma.input.delimiter":","})

    #3. Save the output
    # fileUtil.save_file(outputRDD, outputFilename, "text", "json")

    #4. Reduce rdds
    reducedRDD = workflow.reduce_rdds(numFramerPartitions, outputRDD)
    reducedRDD.persist()

    types = [
        {"name": "E39_Actor", "uri": "http://www.cidoc-crm.org/cidoc-crm/E39_Actor"},
        {"name": "E82_Actor_Appellation", "uri": "http://www.cidoc-crm.org/cidoc-crm/E82_Actor_Appellation"},
        {"name": "E67_Birth", "uri": "http://www.cidoc-crm.org/cidoc-crm/E67_Birth"},
            root = str(params[4])
            context = str(params[5])
            output_folder = str(params[6])
            output_zip_path = str(params[7])

            #0. Download data file
            dataFileName = download_file(data_file_URL)

            #1. Read the input
            inputRDD = workflow.batch_read_csv(dataFileName).partitionBy(num_partitions)

            #2. Apply the karma Model
            outputRDD = workflow.run_karma(inputRDD,
                                            model_file_URL,
                                            base,
                                            root,
                                            context,
                            data_type="csv",
                            additional_settings={"karma.input.delimiter":",", "karma.output.format": "n3"})

            #3. Save the output
            outputPath = outputFilename + "/" + output_folder
            outputRDD.map(lambda x: x[1]).saveAsTextFile(outputPath)
            print "Successfully apply karma!"

            #4. Concate data files
            input_sum_file = outputFilename + "/" + output_folder + "/"
            output_sum_file = outputFilename + "/" + output_folder + ".n3"
            concate_file(input_sum_file, output_sum_file)
            print "Successfully generate whole data file!"
    contextUrl = "https://raw.githubusercontent.com/american-art/aac-alignment/master/karma-context.json"

    #1. Read the input

    #test big file
    inputRDD = workflow.batch_read_csv(inputFilename).partitionBy(1)

    #test small file
    # inputRDD = workflow.batch_read_csv(inputFilename)


    #2. Apply the karma Model
    outputRDD = workflow.run_karma(inputRDD,
                                   "https://raw.githubusercontent.com/american-art/autry/master/AutryMakers/AutryMakers-model.ttl",
                                   "http://dig.isi.edu/AutryMakers/",
                                   "http://www.cidoc-crm.org/cidoc-crm/E22_Man-Made_Object1",
                                   "https://raw.githubusercontent.com/american-art/aac-alignment/master/karma-context.json",
                                   data_type="csv",
                                   additional_settings={"karma.input.delimiter":","})

    #3. Save the output
    # fileUtil.save_file(outputRDD, outputFilename, "text", "json")

    reducedRDD = workflow.reduce_rdds(outputRDD)

    reducedRDD.persist()
    types = [
        {"name": "E82_Actor_Appellation", "uri": "http://www.cidoc-crm.org/cidoc-crm/E82_Actor_Appellation"}
    ]
    frames = [
        {"name": "AutryMakers", "url": "https://raw.githubusercontent.com/american-art/aac-alignment/master/frames/autryMakers.json-ld"}
    workflow = Workflow(sc)
    contextUrl = "https://raw.githubusercontent.com/american-art/aac-alignment/master/karma-context.json"

    #1. Read the input

    #test big file
    inputRDD = workflow.batch_read_csv(inputFilename).partitionBy(1)

    #test small file
    # inputRDD = workflow.batch_read_csv(inputFilename)

    #2. Apply the karma Model
    outputRDD = workflow.run_karma(
        inputRDD,
        "https://raw.githubusercontent.com/american-art/autry/master/AutryMakers/AutryMakers-model.ttl",
        "http://dig.isi.edu/AutryMakers/",
        "http://www.cidoc-crm.org/cidoc-crm/E22_Man-Made_Object1",
        "https://raw.githubusercontent.com/american-art/aac-alignment/master/karma-context.json",
        data_type="csv",
        additional_settings={"karma.input.delimiter": ","})

    #3. Save the output
    # fileUtil.save_file(outputRDD, outputFilename, "text", "json")

    reducedRDD = workflow.reduce_rdds(outputRDD)

    reducedRDD.persist()
    types = [{
        "name": "E82_Actor_Appellation",
        "uri": "http://www.cidoc-crm.org/cidoc-crm/E82_Actor_Appellation"
    }]
    frames = [{