Beispiel #1
0
    def __init__(self, func, makedict=1, nthreads=5, inq=None, outq=None, callback=None):
        """Set the function you want to use for extraction and the number of threads.
        The function should take a PIL Image as input and return an array of feature values.
        You can then directly add filenames to be feature extracted to this.inq.

        If 'makedict' is true (the default), then there will be an instance variable
        called 'featdict' which will be populated with the feature values as they are computed.
        This is a dict mapping fnames (from the inq) to the feature values.
        If you want to later use the 'getmultiple()' function, 'makedict' needs to be true.

        If this is false, then you can directly access the output queue yourself
        by calling this.outq.get(). Note that if using multiple threads, then you're not
        guaranteed that outputs will be in the same order as the inputs.

        You can optionally pass in the input and output queues.
        If you don't, then they are created.

        You can optionally pass in a callback called when a feature is extracted for an image.
        It is called with (fname, feats)
        """
        from Queue import Queue
        # set params
        self.func = func
        self.nthreads = nthreads
        if not inq: inq = Queue()
        if not outq: outq = Queue()
        self.inq, self.outq = inq, outq
        self.callback = callback
        # start threads
        self.featxs = spawnWorkers(self.nthreads, featxloop, args=(self.func, self.inq, self.outq, self.callback), interval=0.1)
        if makedict:
            self.featdict = {}
            self.q2dictthread = spawnWorkers(1, q2dict, args=(self.outq, self.featdict))
        else:
            self.featdict = None
Beispiel #2
0
 def __init__(self, args, num=0, **kw):
     """Starts a set of processes, defined by num:
         if num is an int:
             > 0: that many procs
             <= 0: getNumCPUs() - num
         elif num is a float:
             that percentage of cpus of this sys
     Any additional kw args are passed to the initializer for Process().
     (These are the same as the inputs to subprocess.Popen())
     """
     from threading import Lock
     from copy import deepcopy
     from Queue import Queue, LifoQueue
     from nkthreadutils import spawnWorkers
     from nkutils import parseNProcs
     self.nprocs = nprocs = parseNProcs(num)
     self.args = deepcopy(args) # in case we alter them
     # spawn processes and associated locks and working queues
     self.procs = [Process(args, **kw) for i in range(nprocs)]
     self.proclocks = [Lock() for p in self.procs]
     self.working = [LifoQueue() for p in self.procs]
     # spawn instance vars to track inputs and results
     self.inq = Queue()
     self.results = {}
     self.resultlock = Lock()
     # spawn worker threads
     self.inloop = spawnWorkers(1, self.inputloop)[0]
     self.outloop = spawnWorkers(1, self.outputloop)[0]
Beispiel #3
0
 def __init__(self, args, num=0, **kw):
     """Starts a set of processes, defined by num:
         if num is an int:
             > 0: that many procs
             <= 0: getNumCPUs() - num
         elif num is a float:
             that percentage of cpus of this sys
     Any additional kw args are passed to the initializer for Process().
     (These are the same as the inputs to subprocess.Popen())
     """
     from threading import Lock
     from copy import deepcopy
     from Queue import Queue, LifoQueue
     from nkthreadutils import spawnWorkers
     from nkutils import parseNProcs
     self.nprocs = nprocs = parseNProcs(num)
     self.args = deepcopy(args)  # in case we alter them
     # spawn processes and associated locks and working queues
     self.procs = [Process(args, **kw) for i in range(nprocs)]
     self.proclocks = [Lock() for p in self.procs]
     self.working = [LifoQueue() for p in self.procs]
     # spawn instance vars to track inputs and results
     self.inq = Queue()
     self.results = {}
     self.resultlock = Lock()
     # spawn worker threads
     self.inloop = spawnWorkers(1, self.inputloop)[0]
     self.outloop = spawnWorkers(1, self.outputloop)[0]
Beispiel #4
0
def gisttest(modelfname, maxsize=None):
    """Runs gist classification on all fnames passed in through stdin"""
    from nktrainutils import readSVMModelAndParams, bulkclassify
    from nkutils import stdmainloop
    # read the model
    model = readSVMModelAndParams(modelfname)
    assert model.scales
    fx = FeatureExtractor(lambda im: gistfeatures(im, maxsize=maxsize), makedict=0, nthreads=11)

    # setup the classification, in other threads
    def classifyfunc(fx=fx, model=model, timeout=1):
        from nkthreadutils import feastOnQueue
        while 1:
            t = time.time()
            els = [(fname, feats) for fname, feats in feastOnQueue(fx.outq, timeout) if feats is not None]
            if els:
                fnames, feats = zip(*els)
                scores = [l*v for l, v in bulkclassify(model, feats)]
                for score, fname in zip(scores, fnames):
                    print '%s\t%s' % (score, fname)
            elapsed = time.time()-t
            if els:
                print >>sys.stderr, 'Classified %d els in %0.3fs, each of len %d' % (len(feats), elapsed, len(feats[0]))
            time.sleep(max(0, timeout-elapsed))

    clsworkers = spawnWorkers(5, classifyfunc)

    # start reading from stdin
    stdmainloop(lambda fname: fx.inq.put(fname))

    time.sleep(10)
Beispiel #5
0
        If a callback is given, then it's executed with the same args when we're about to add."""
        while 1:
            try:
                el = dbdelayq.get()
                func, args, kw, timestamp = el[:4]
                delay = time.time() - timestamp
                delays.append(delay)
                if len(delays) > MAX_DELAYS:
                    delays.pop(0)
                if callback:
                    callback(func, args, kw, timestamp)
                retrysql(func, nretries, *args, **kw)
            except Exception, e:
                pass

    spawnWorkers(nworkers, dbdelayexec)

    def addDelayedSQL(meth, *args, **kw):
        """Adds a sql statement to execute delayed.
        If you want it to execute immediately (synchronously), you can send immediate=1.
        Note that for debugging, we can easily turn all delayed entries into immediate ones."""
        #TODO perhaps add a backoff sleep() if the lagtime becomes too long
        if debug or ('immediate' in kw and kw['immediate']):
            if 'immediate' in kw:
                del kw['immediate']
            retrysql(meth, nretries, *args, **kw)
        else:
            dbdelayq.put((meth, args, kw, time.time()))
            if penalty > 0:
                d = delays[-1]
                p = penalty * d
Beispiel #6
0
def ipca_threaded(means,
                  datacallback,
                  keep=-1,
                  dtype=float64,
                  init=None,
                  sonly=0,
                  sparse=0):
    """Iterative PCA, which builds up the scatter matrix iteratively.
    This is a threaded version, which is faster if the datacallback takes some time...
    This iterates over datacallback repeatedly to get chunks of data.
    Each chunk C should be of size ndims X chunk_size (where chunk_size 
    can vary between chunks). A scatter matrix is computed by first 
    subtracting means, and then taking C * C.T. This is added to a running
    total scatter matrix S. Finally, when the datacallback is exhausted
    (e.g., end of generator or list), then PCA is run on S, and eigvalues are sorted.
    
    Returns (eigvals, eigvecs).
    
    'keep' determines how many dimensions to keep:
        <= 0: all dimensions (default)
        0.0 < keep <= 1.0: given percentage of total variance (must be a float)
        >= 1: given number of dimensions (must be an int)
    
    In all cases, at most min(len(means)) will be kept.

    You can also pass in an S matrix as initialization (otherwise zeros() is used).
    If sonly is true, then only returns the complete S matrix.
    """
    t1 = time.time()
    ndims = max(means.shape)
    means = means.reshape((ndims, 1))
    if init:
        assert init.shape[0] == ndims == init.shape[
            1], 'Given init must have shape (%d, %d) but had shape %s' % (
                ndims, ndims, s.shape)
        s = init
    else:
        s = zeros((ndims, ndims), dtype=dtype)
    t2 = time.time()
    log('    Done initializing S matrix of shape %s in %0.3f secs...' %
        (s.shape, t2 - t1))
    tot = 0
    num = 0
    from Queue import Queue
    qsize = 5
    q = Queue(qsize)
    from nkthreadutils import spawnWorkers

    def loadq(q, callback):
        for d in callback:
            d -= means
            q.put(d)
        q.put(None)

    datathread = spawnWorkers(1, loadq, args=(q, datacallback))[0]
    i = 0
    while 1:
        d = q.get()
        i += 1
        if d is None: break
        num += d.shape[1]
        t3 = time.time()
        s += dot(d, d.T)
        tot += time.time() - t3
        #log('      On iteration %d of datacallback and got %d total elements, added in %0.3f secs' % (i, num, tot))
    if sonly: return s
    if sparse:
        assert type(keep) == type(123)
        values, vecs = sparseeigs(s, keep)
    else:
        values, vecs = sortedeigs(s, dtype=dtype)
    return prunedims(values, vecs, keep)
Beispiel #7
0
def ipca_threaded(means, datacallback, keep=-1, dtype=float64, init=None, sonly=0, sparse=0):
    """Iterative PCA, which builds up the scatter matrix iteratively.
    This is a threaded version, which is faster if the datacallback takes some time...
    This iterates over datacallback repeatedly to get chunks of data.
    Each chunk C should be of size ndims X chunk_size (where chunk_size 
    can vary between chunks). A scatter matrix is computed by first 
    subtracting means, and then taking C * C.T. This is added to a running
    total scatter matrix S. Finally, when the datacallback is exhausted
    (e.g., end of generator or list), then PCA is run on S, and eigvalues are sorted.
    
    Returns (eigvals, eigvecs).
    
    'keep' determines how many dimensions to keep:
        <= 0: all dimensions (default)
        0.0 < keep <= 1.0: given percentage of total variance (must be a float)
        >= 1: given number of dimensions (must be an int)
    
    In all cases, at most min(len(means)) will be kept.

    You can also pass in an S matrix as initialization (otherwise zeros() is used).
    If sonly is true, then only returns the complete S matrix.
    """
    t1 = time.time()
    ndims = max(means.shape)
    means = means.reshape((ndims,1))
    if init:
        assert init.shape[0] == ndims == init.shape[1], 'Given init must have shape (%d, %d) but had shape %s' % (ndims, ndims, s.shape)
        s = init
    else:
        s = zeros((ndims, ndims), dtype=dtype)
    t2 = time.time()
    log('    Done initializing S matrix of shape %s in %0.3f secs...' % (s.shape, t2-t1))
    tot = 0
    num = 0
    from Queue import Queue
    qsize=5
    q = Queue(qsize)
    from nkthreadutils import spawnWorkers
    def loadq(q, callback):
        for d in callback:
            d -= means
            q.put(d)
        q.put(None)

    datathread = spawnWorkers(1, loadq, args=(q, datacallback))[0]
    i = 0
    while 1:
        d = q.get()
        i += 1
        if d is None: break
        num += d.shape[1]
        t3 = time.time()
        s += dot(d, d.T)
        tot += time.time()-t3
        #log('      On iteration %d of datacallback and got %d total elements, added in %0.3f secs' % (i, num, tot))
    if sonly: return s
    if sparse:
        assert type(keep) == type(123)
        values, vecs = sparseeigs(s, keep)
    else:
        values, vecs = sortedeigs(s, dtype=dtype)
    return prunedims(values, vecs, keep)
Beispiel #8
0
        The q should contain elements like (func, args, kw, timestamp).
        If a callback is given, then it's executed with the same args when we're about to add."""
        while 1:
            try:
                el = dbdelayq.get()
                func, args, kw, timestamp = el[:4]
                delay = time.time()-timestamp
                delays.append(delay)
                if len(delays) > MAX_DELAYS:
                    delays.pop(0)
                if callback:
                    callback(func, args, kw, timestamp)
                retrysql(func, nretries, *args, **kw)
            except Exception, e:
                pass
    spawnWorkers(nworkers, dbdelayexec)

    def addDelayedSQL(meth, *args, **kw):
        """Adds a sql statement to execute delayed.
        If you want it to execute immediately (synchronously), you can send immediate=1.
        Note that for debugging, we can easily turn all delayed entries into immediate ones."""
        #TODO perhaps add a backoff sleep() if the lagtime becomes too long
        if debug or ('immediate' in kw and kw['immediate']):
            if 'immediate' in kw:
                del kw['immediate']
            retrysql(meth, nretries, *args, **kw)
        else:
            dbdelayq.put((meth, args, kw, time.time()))
            if penalty > 0:
                d = delays[-1]
                p = penalty * d