Python FileUtil.load_json_file 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: fileUtil

클래스/타입: FileUtil

메소드/함수: load_json_file

hotexamples.com에서의 예제들: 6

Python FileUtil.load_json_file - 6개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 fileUtil.FileUtil.load_json_file에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

load_json_file(6)

save_json_file(3)

FileUtil(1)

checkFile(1)

get_all_file(1)

is_exists(1)

mkDir(1)

spit_filename(1)

writeFile(1)

예제 #1

파일 보기

파일: cleaner.py 프로젝트: dingyi567/dig-entity-merger

    usage = "usage: %prog [options] inputDataset inputDatasetFormat inputPath" \
            "baseDataset baseDatasetFormat" \
            "outputFilename outoutFileFormat"
    parser = OptionParser()
    parser.add_option("-r", "--separator", dest="separator", type="string",
                      help="field separator", default="\t")
    parser.add_option("-p", "--numPartitions", dest="numPartitions", type="int",
                      help="number of partitions", default=10)

    (c_options, args) = parser.parse_args()
    inputFilename1 = args[0]
    inputFileFormat1 = args[1]
    inputPath = args[2]

    baseFilename = args[3]
    baseFormat = args[4]

    outputFilename = args[5]
    outputFileFormat = args[6]
    print "Got options:", c_options, ", " \
                         "input:", inputFilename1, ",", inputFileFormat1, ",", inputPath, \
                         ", base:", baseFilename, ",", baseFormat
    print "Write output to:", outputFilename
    fileUtil = FileUtil(sc)
    input_rdd1 = fileUtil.load_json_file(inputFilename1, inputFileFormat1, c_options).partitionBy(c_options.numPartitions)
    base_rdd = fileUtil.load_json_file(baseFilename, baseFormat, c_options)

    result_rdd = EntityCleaner.clean_rdds(input_rdd1, inputPath, base_rdd, c_options.numPartitions)

    fileUtil.save_json_file(result_rdd, outputFilename, outputFileFormat, c_options)

예제 #2

파일 보기

파일: framer.py 프로젝트: dingyi567/dig-entity-merger

# then pass in result to merge-rdds along with input-rdd or output-rdd if defined,
#set output-rdd as result from merge
# return output

if __name__ == "__main__":
    sc = SparkContext(appName="DIG-FRAMER")
    parser = OptionParser()
    parser.add_option("-r", "--separator", dest="separator", type="string", help="field separator", default="\t")
    parser.add_option("-n", "--numPartitions", dest="numPartitions", type="int", help="number of partitions", default=5)

    (c_options, args) = parser.parse_args()
    frameFilename = args[0]
    rddFilename = args[1]
    outputFilename = args[2]
    if len(args) > 3:
        outputFileFormat = args[3]
    else:
        outputFileFormat = "text"
    type_to_rdd_json_input = open(rddFilename)
    type_to_rdd_json = json.load(type_to_rdd_json_input)
    type_to_rdd_json_input.close()
    frame_input = open(frameFilename)
    frame = json.load(frame_input)
    frame_input.close()
    fileUtil = FileUtil(sc)
    for key, val in type_to_rdd_json.items():
        val["rdd"] = fileUtil.load_json_file(val["path"], val["format"], c_options)
    output_rdd = frame_json(frame, type_to_rdd_json)
    print "Write output to:", outputFilename
    fileUtil.save_json_file(output_rdd, outputFilename, outputFileFormat, c_options)

예제 #3

파일 보기

파일: merger.py 프로젝트: usc-isi-i2/dig-entity-merger

    inputFilename1 = args[0]
    inputFileFormat1 = args[1]
    inputPath = args[2]

    baseFilename = args[3]
    baseFormat = args[4]

    joinResultFilename = args[5]
    joinFormat = args[6]

    outputFilename = args[7]
    outputFileFormat = args[8]

    removeElementsStr = c_options.remove
    removeElements = []
    if len(removeElementsStr) > 0:
        removeElements = removeElementsStr.split(",")

    print "Got options:", c_options, ", " \
                         "input:", inputFilename1, ",", inputFileFormat1, ",", inputPath, \
                         ", base:", baseFilename, ",", baseFormat, ", join:", joinResultFilename

    print "Write output to:", outputFilename
    fileUtil = FileUtil(sc)
    input_rdd1 = fileUtil.load_json_file(inputFilename1, inputFileFormat1, c_options)
    base_rdd = fileUtil.load_json_file(baseFilename, baseFormat, c_options)
    join_rdd = fileUtil.load_json_file(joinResultFilename, joinFormat, c_options)

    result_rdd = EntityMerger.merge_rdds(input_rdd1, inputPath, base_rdd, join_rdd, removeElements, c_options.numPartitions)

    fileUtil.save_json_file(result_rdd, outputFilename, outputFileFormat, c_options)

예제 #4

파일 보기

파일: convertToSeq.py 프로젝트: usc-isi-i2/WEDC

#!/usr/bin/env python

from pyspark import SparkContext

from optparse import OptionParser
from fileUtil import FileUtil

if __name__ == "__main__":
    sc = SparkContext(appName="DIG-TEXT-TO-SEQ")

    usage = "usage: %prog [options] inputDataset outputFilename"
    parser = OptionParser()
    parser.add_option("-r", "--separator", dest="separator", type="string",
                      help="field separator", default="\t")

    (c_options, args) = parser.parse_args()
    print "Got options:", c_options
    inputFilename1 = args[0]
    outputFilename = args[1]

    print "Write output to:", outputFilename
    fileUtil = FileUtil(sc)
    input_rdd = fileUtil.load_json_file(inputFilename1, "text", c_options)

    print "Write output to:", outputFilename
    fileUtil.save_json_file(input_rdd, outputFilename, "sequence", c_options)

예제 #5

파일 보기

파일: deduplicator.py 프로젝트: dingyi567/dig-entity-merger

                        else:
                            seen_objs.add(json.dumps(part))

        return input_json


if __name__ == "__main__":
    sc = SparkContext(appName="DIG-ENTITY_DEDUPLICATOR")

    usage = "usage: %prog [options] inputDataset inputDatasetFormat inputPath " \
            "outputFilename outoutFileFormat"
    parser = OptionParser()
    parser.add_option("-r", "--separator", dest="separator", type="string",
                      help="field separator", default="\t")

    (c_options, args) = parser.parse_args()
    print "Got options:", c_options
    inputFilename = args[0]
    inputFileFormat = args[1]
    inputPath = args[2]

    print "Read ", inputFileFormat, " file from ", inputFilename, " with path:", inputPath
    outputFilename = args[3]
    outputFileFormat = args[4]

    print "Write output to:", outputFilename
    fileUtil = FileUtil(sc)
    input_rdd = fileUtil.load_json_file(inputFilename, inputFileFormat, c_options)
    result_rdd = input_rdd.mapValues(lambda x: EntityDeduplicator().deduplicate(x, inputPath))

    fileUtil.save_json_file(result_rdd, outputFilename, outputFileFormat, c_options)

예제 #6

파일 보기

파일: generateSelfMerge.py 프로젝트: dingyi567/dig-entity-merger

from optparse import OptionParser
from fileUtil import FileUtil
import json

if __name__ == "__main__":
    sc = SparkContext(appName="DIG-ENTITY_MERGER")

    usage = "usage: %prog [options] inputDataset inputDatasetFormat" \
            "outputFilename"
    parser = OptionParser()
    parser.add_option("-r", "--separator", dest="separator", type="string",
                      help="field separator", default="\t")

    (c_options, args) = parser.parse_args()
    inputFilename1 = args[0]
    inputFileFormat1 = args[1]
    outputFilename = args[2]

    print "Got options:", c_options, ",input:", inputFilename1 + ", output:", outputFilename

    fileUtil = FileUtil(sc)
    input_rdd1 = fileUtil.load_json_file(inputFilename1, inputFileFormat1, c_options)

    def write_result(x):
        key = x[0]
        #print "Got key:", key
        return json.dumps({"uri":key, "matches":[{"uri": key}]})

    result = input_rdd1.map(write_result)
    result.saveAsTextFile(outputFilename)