Ejemplo n.º 1
0
    def __preprocess_files_by_workers(self,
                                      maxWorkerThreads,
                                      filesToBePreprocessed,
                                      extensionStr,
                                      preprocessModule,
                                      tempFileSeed=None,
                                      options=list(),
                                      verbose=False,
                                      parseErrorFiles=None):
        assert maxWorkerThreads >= 2

        filesToBePreprocessed = list(filesToBePreprocessed)

        chunkSize = 200
        chunkSizeMax = 2000
        s2 = len(filesToBePreprocessed) / 64
        if s2 > chunkSize:
            chunkSize = s2
        if chunkSize > chunkSizeMax:
            chunkSize = chunkSizeMax

        commands = list()
        tempFiles = list()
        fi = 0
        while fi < len(filesToBePreprocessed):
            fiStart, fiEnd = fi, min(fi + chunkSize,
                                     len(filesToBePreprocessed))
            fi += chunkSize
            cmd = [sys.executable, __file__, preprocessModule.getname()]
            for k, v in options:
                cmd.append(k)
                cmd.append(v)
            fn = make_temp_filename(tempFileSeed, self.__syscnv)
            tempFiles.append(fn)

            f = fopen(fn, "wb")
            for i in xrange(fiStart, fiEnd):
                f.write(filesToBePreprocessed[i])
                f.write('\n')
            f.close()

            cmd.append('-i')
            cmd.append(fn)
            commands.append(cmd)

            if parseErrorFiles is not None:
                en = make_temp_filename(tempFileSeed, self.__syscnv)
                parseErrorFiles.append(en)
                tempFiles.append(en)
                cmd.append("--parseerrors=%s" % en)

        if verbose:
            progressBar = utility.ProgressReporter(len(commands))
        else:
            progressBar = utility.ProgressReporter(0)

        doneCount = 0
        for index, result in threadingutil.multithreading_iter(
                invoke_subprocess, commands, maxWorkerThreads):
            if result != 0:
                raise RuntimeError, "error in invocation of subprocess"
            doneCount += 1
            #progressBar.proceed(doneCount)

        if parseErrorFiles is not None:
            for en in parseErrorFiles:
                f = fopen(en, "r")
                if not f:
                    print >> sys.stderr, "error: can't open a temporary file '%s'" % en
                    sys.exit(2)
                parseErrorFiles.append(f.readlines())
                f.close()

        for fn in tempFiles:
            remove_file_neglecting_error(fn)

        progressBar.done()
Ejemplo n.º 2
0
    usage = "Usage: testthreadingutil.py [NUMWORKER [INPUTSIZE]]"

    numWorker = 4
    inputSize = 30

    if len(sys.argv) >= 2:
        if sys.argv[1] == "-h":
            print usage
            sys.exit(0)
        numWorker = int(sys.argv[1])
    if len(sys.argv) >= 3:
        inputSize = int(sys.argv[2])
    if len(sys.argv) >= 4:
        print usage
        sys.exit(1)

    def genargslist(size):
        for v in xrange(size):
            yield (v, )

    t1 = time.time()

    #for index, result in threadingutil.multithreading_iter(f, [ args for args in genargslist(inputSize) ], numWorker):
    for index, result in threadingutil.multithreading_iter(
            f, genargslist(inputSize), numWorker):
        print "index = ", index, ", result = ", result

    print
    print "NUMWORKER = %d, INPUTSIZE = %d" % (numWorker, inputSize)
    print "elapsed time: %g" % (time.time() - t1)
Ejemplo n.º 3
0
    def __preprocess_files_by_workers(
        self,
        maxWorkerThreads,
        filesToBePreprocessed,
        extensionStr,
        preprocessModule,
        tempFileSeed=None,
        options=list(),
        verbose=False,
        parseErrorFiles=None,
    ):
        assert maxWorkerThreads >= 2

        filesToBePreprocessed = list(filesToBePreprocessed)

        chunkSize = 200
        chunkSizeMax = 2000
        s2 = len(filesToBePreprocessed) / 64
        if s2 > chunkSize:
            chunkSize = s2
        if chunkSize > chunkSizeMax:
            chunkSize = chunkSizeMax

        commands = list()
        tempFiles = list()
        fi = 0
        while fi < len(filesToBePreprocessed):
            fiStart, fiEnd = fi, min(fi + chunkSize, len(filesToBePreprocessed))
            fi += chunkSize
            cmd = [sys.executable, __file__, preprocessModule.getname()]
            for k, v in options:
                cmd.append(k)
                cmd.append(v)
            fn = make_temp_filename(tempFileSeed, self.__syscnv)
            tempFiles.append(fn)

            f = fopen(fn, "wb")
            for i in xrange(fiStart, fiEnd):
                f.write(filesToBePreprocessed[i])
                f.write("\n")
            f.close()

            cmd.append("-i")
            cmd.append(fn)
            commands.append(cmd)

            if parseErrorFiles is not None:
                en = make_temp_filename(tempFileSeed, self.__syscnv)
                parseErrorFiles.append(en)
                tempFiles.append(en)
                cmd.append("--parseerrors=%s" % en)

        if verbose:
            progressBar = utility.ProgressReporter(len(commands))
        else:
            progressBar = utility.ProgressReporter(0)

        doneCount = 0
        for index, result in threadingutil.multithreading_iter(invoke_subprocess, commands, maxWorkerThreads):
            if result != 0:
                raise RuntimeError, "error in invocation of subprocess"
            doneCount += 1
            # progressBar.proceed(doneCount)

        if parseErrorFiles is not None:
            for en in parseErrorFiles:
                f = fopen(en, "r")
                if not f:
                    print >> sys.stderr, "error: can't open a temporary file '%s'" % en
                    sys.exit(2)
                parseErrorFiles.append(f.readlines())
                f.close()

        for fn in tempFiles:
            remove_file_neglecting_error(fn)

        progressBar.done()
Ejemplo n.º 4
0
if __name__ == '__main__':
    usage = "Usage: testthreadingutil.py [NUMWORKER [INPUTSIZE]]"
    
    numWorker = 4
    inputSize = 30
    
    if len(sys.argv) >= 2:
        if sys.argv[1] == "-h":
            print usage
            sys.exit(0)
        numWorker = int(sys.argv[1])
    if len(sys.argv) >= 3:
        inputSize = int(sys.argv[2])
    if len(sys.argv) >= 4:
        print usage
        sys.exit(1)
    
    def genargslist(size):
        for v in xrange(size):
            yield ( v, )
    
    t1 = time.time()
    
    #for index, result in threadingutil.multithreading_iter(f, [ args for args in genargslist(inputSize) ], numWorker):
    for index, result in threadingutil.multithreading_iter(f, genargslist(inputSize), numWorker):
        print "index = ", index, ", result = ", result
    
    print
    print "NUMWORKER = %d, INPUTSIZE = %d" % ( numWorker, inputSize )
    print "elapsed time: %g" % (time.time() - t1)