コード例 #1
0
ファイル: parseWikidata.py プロジェクト: reconrus/IP1e
                        dest="chunk_size",
                        default=1024 * 1024 * 1024)
    args = parser.parse_args()
    args.chunk_size = int(args.chunk_size)

    if not os.path.exists(args.output):
        os.mkdir(args.output)

    logs_file = open(os.path.join(args.output, "logs.txt"), 'a+')

    pool = mp.Pool(mp.cpu_count())
    jobs = []
    start_time = time.time()
    print("Chunks of size %s" % size(args.chunk_size), file=logs_file)
    for jobID, chunk in enumerate(
            Chunker.chunkify(args.input, size=args.chunk_size)):
        job = pool.apply_async(
            process_chunk,
            (chunk, args.input, args.output, jobID, args.encoding))
        jobs.append(job)

    output = []
    for job in jobs:
        job.get()

    pool.close()

    print("Total # of chunks: %d" % (jobID + 1), file=logs_file)
    print("Total time: {}".format(
        datetime.timedelta(seconds=time.time() - start_time)),
          file=logs_file)