Ejemplo n.º 1
0
def getsharedsequencefactory(startvalue, intervallen=5000):
    """ Creates a factory for parallel readers of a sequence.

        Returns a callable f. When f() is called, it returns a callable g.
        Whenever g(*args) is called, it returns a unique int from a sequence
        (if several g's are created, the order of the calls may lead to that
        the returned ints are not ordered, but they will be unique). The
        arguments to g are ignored, but accepted. Thus g can be used as
        idfinder for [Decoupled]Dimensions.

        The different g's can be used safely from different processes and
        threads.

        Arguments:
            
        - startvalue: The first value to return. If None, 0 is assumed.
        - intervallen: The amount of numbers that a single g from above
          can return before synchronization is needed to get a new amount.
          Default: 5000.
    """
    if startvalue is None:
        startvalue = 0

    # We use a Queue to ensure that intervals are only given to one deliverer
    values = multiprocessing.Queue(10)

    # A worker that fills the queue
    def valuegenerator(nextval):
        sys.excepthook = _getexcepthook()
        while True:
            values.put((nextval, nextval + intervallen))
            nextval += intervallen

    p = multiprocessing.Process(target=valuegenerator, args=(startvalue, ))
    p.daemon = True
    p.start()

    # A generator that repeatedly gets an interval from the queue and returns
    # all numbers in that interval before it gets a new interval and goes on
    # ...
    def valuedeliverer():
        while True:
            interval = values.get()
            for i in range(*interval):
                yield i

    # A factory method for the object the end-consumer calls
    def factory():
        generator = valuedeliverer()  # get a unique generator

        # The method called (i.e., the g) by the end-consumer

        def getnextseqval(*ignored):
            return next(generator)

        return getnextseqval

    return factory
Ejemplo n.º 2
0
def _getexitfunction():
    """Return a function that halts the execution of pygrametl.

       pygrametl uses the function as excepthook in spawned processes such that
       an uncaught exception halts the entire execution.
    """
    # On Java, System.exit will do as there are no separate processes
    if sys.platform.startswith('java'):
        def javaexitfunction():
            import java.lang.System
            java.lang.System.exit(1)
        return javaexitfunction

    # else see if the os module provides functions to kill process groups;
    # this should be the case on UNIX.
    import signal
    if hasattr(os, 'getpgrp') and hasattr(os, 'killpg'):
        def unixexitfunction():
            procgrp = os.getpgrp()
            os.killpg(procgrp, signal.SIGTERM)
        return unixexitfunction

    # else, we are on a platform that does not allow us to kill a group.
    # We make a special process that gets the pids of all calls to
    # this procedure. The function we return, informs this process to kill
    # all processes it knows about.

    # set up the terminator
    global _toterminator
    if _toterminator is None:
        _toterminator = multiprocessing.Queue()

        def terminatorfunction():
            pids = set([_masterpid])
            while True:
                item = _toterminator.get()
                if isinstance(item, int):
                    pids.add(item)
                else:
                    # We take it as a signal to kill all
                    for p in pids:
                        # we don't know which signals exist; use 9
                        os.kill(p, 9)
                    return

        terminatorprocess = multiprocessing.Process(target=terminatorfunction)
        terminatorprocess.daemon = True
        terminatorprocess.start()

    # tell the terminator about this process
    _toterminator.put(os.getpid())

    # return a function that tells the terminator to kill all known processes
    def exitfunction():
        _toterminator.put('TERMINATE')

    return exitfunction
Ejemplo n.º 3
0
def shareconnectionwrapper(targetconnection, maxclients=10, userfuncs=()):
    """Share a ConnectionWrapper between several processes/threads.

    When Decoupled objects are used, they can try to update the DW at the same 
    time. They can use several ConnectionWrappers to avoid race conditions, but
    this is not transactionally safe. Instead, they can use a "shared" 
    ConnectionWrapper obtained through this function.

    When a ConnectionWrapper is shared, it is executing in a separate process
    (or thread, in case Jython is used) and ensuring that only one operation
    takes place at the time. This is hidden from the users of the shared 
    ConnectionWrapper.  They see an interface similar to the normal 
    ConnectionWrapper.

    When this method is called, it returns a SharedConnectionWrapperClient
    which can be used as a normal ConnectionWrapper. Each process 
    (i.e., each Decoupled object) should, however, get a unique
    SharedConnectionWrapperClient by calling copy() on the returned
    SharedConnectionWrapperClient.

    Note that a shared ConnectionWrapper needs to hold the complete result of 
    each query in memory until it is fetched by the process that executed the
    query. Again, this is hidden from the users.

    It is also possible to add methods to a shared ConnectionWrapper when it 
    is created. When this is done and the method is invoked, no other 
    operation will modify the DW at the same time. If, for example,
    the functions foo and bar are added to a shared ConnectionWrapper (by
    passing the argument userfuncs=(foo, bar) to shareconnectionwrapper),
    the returned SharedConnectionWrapperClient will offer the methods
    foo and bar which when called will be running in the separate process
    for the shared ConnectionWrapper. This is particularly useful for
    user-defined bulk loaders as used by BulkFactTable:

    def bulkload():
        # DBMS-specific code here.
        # No other DW operation should take place concurrently

    scw = shareconnectionwrapper(ConnectionWrapper(...), userfuncs=(bulkload,))
    facttbl = BulkFact(..., bulkloader=scw.copy().bulkload) #Note the .copy(). 

    Arguments:
    - targetconnection: a pygrametl ConnectionWrapper
    - maxclients: the maximum number of concurrent clients. Default: 10
    - userfuncs: a sequence of functions to add to the shared 
    ConnectionWrapper. Default: ()
    """
    toserver = multiprocessing.JoinableQueue(5000)
    toclients = [multiprocessing.Queue() for i in range(maxclients)]
    freelines = multiprocessing.Queue()
    for i in range(maxclients):
        freelines.put(i)
    serverCW = SharedConnectionWrapperServer(targetconnection, toserver,
                                             toclients)
    userfuncnames = []
    for func in userfuncs:
        if not (callable(func) and hasattr(func, 'func_name') and \
                    not func.func_name == '<lambda>'):
            raise ValueError, "Elements in userfunc must be callable and named"
        if hasattr(SharedConnectionWrapperClient, func.func_name):
            raise ValueError, "Illegal function name: " + func.func_name
        setattr(serverCW, '_userfunc_' + func.func_name, func)
        userfuncnames.append(func.func_name)
    serverprocess = multiprocessing.Process(target=serverCW.worker)
    serverprocess.name = 'Process for shared connection wrapper'
    serverprocess.daemon = True
    serverprocess.start()
    module = targetconnection.getunderlyingmodule()
    clientCW = SharedConnectionWrapperClient(toserver, toclients, freelines,
                                             module, userfuncnames)
    return clientCW