Example #1
0
    def check_brick_done(brick):
        nblobs, taskid = get_brick_nblobs(brick)
        ncancelled = len(brick_cancelled.get(brick, []))
        ndone = len(allresults.get(brick, []))
        tnow = time.time()
        if not brick in last_brick_status:
            last_brick_status[brick] = tnow
        if tnow - last_brick_status[brick] > brick_status_period:
            print('Brick', brick, ':', ncancelled, 'cancelled,', ndone,
                  'done, total',
                  (nblobs if nblobs is not None else '(unknown)'))
            last_brick_status[brick] = tnow

        if nblobs is None:
            return
        if ndone + ncancelled < nblobs:
            return
        # Done this brick!  Set qdo state=Succeeded
        checkpoint_fn = opt.checkpoint % dict(brick=brick)
        R = [
            dict(brickname=brick, iblob=iblob, result=res)
            for iblob, res in allresults[brick].items()
        ]
        print('Writing final checkpoint', checkpoint_fn)
        _write_checkpoint(R, checkpoint_fn)
        print('Setting QDO task to Succeeded:', brick)
        q.set_task_state(taskid, qdo.Task.SUCCEEDED)
        del allresults[brick]
        finished_bricks.put((brick, len(R)))
Example #2
0
 def check_brick_done(brick):
     nblobs, taskid = get_brick_nblobs(brick)
     if nblobs is None:
         return
     ncancelled = len(brick_cancelled.get(brick, []))
     if len(allresults[brick]) + ncancelled < nblobs:
         return
     # Done this brick!  Set qdo state=Succeeded
     checkpoint_fn = opt.checkpoint % dict(brick=brick)
     R = [
         dict(brickname=brick, iblob=iblob, result=res)
         for iblob, res in allresults[brick].items()
     ]
     print('Writing final checkpoint', checkpoint_fn)
     _write_checkpoint(R, checkpoint_fn)
     print('Setting QDO task to Succeeded:', brick)
     q.set_task_state(taskid, qdo.Task.SUCCEEDED)
     del allresults[brick]
     finished_bricks.put((brick, len(R)))
Example #3
0
def output_thread(queuename, outqueue, checkpointqueue, blobsizes,
                  finished_bricks, opt):
    try:
        import setproctitle
        setproctitle.setproctitle('farm: output')
    except:
        pass

    import qdo
    q = qdo.connect(queuename)

    allresults = {}

    # Stored values from the 'blobsizes' queue.
    # brick -> (nblobs, qdo_taskid)
    brick_info = {}

    # Local mapping of brickname -> [set of cancelled blob ids]
    brick_cancelled = {}

    def get_brick_nblobs(brick, defnblobs=None):
        if not brick in brick_info:
            try:
                while True:
                    br, nb, tid = blobsizes.get(block=False)
                    brick_info[br] = (nb, tid)
            except queue.Empty:
                pass
        return brick_info.get(brick, (defnblobs, None))

    def check_brick_done(brick):
        nblobs, taskid = get_brick_nblobs(brick)
        if nblobs is None:
            return
        ncancelled = len(brick_cancelled.get(brick, []))
        if len(allresults[brick]) + ncancelled < nblobs:
            return
        # Done this brick!  Set qdo state=Succeeded
        checkpoint_fn = opt.checkpoint % dict(brick=brick)
        R = [
            dict(brickname=brick, iblob=iblob, result=res)
            for iblob, res in allresults[brick].items()
        ]
        print('Writing final checkpoint', checkpoint_fn)
        _write_checkpoint(R, checkpoint_fn)
        print('Setting QDO task to Succeeded:', brick)
        q.set_task_state(taskid, qdo.Task.SUCCEEDED)
        del allresults[brick]
        finished_bricks.put((brick, len(R)))

    last_checkpoint = time.time()
    last_checkpoint_size = {}

    while True:
        tnow = time.time()
        dt = tnow - last_checkpoint
        if dt > opt.checkpoint_period:
            for brick, brickresults in allresults.items():
                if brick in last_checkpoint_size:
                    if len(brickresults) == last_checkpoint_size[brick]:
                        #print('Brick', brick, 'has not changed since last checkpoint was written')
                        continue
                checkpoint_fn = opt.checkpoint % dict(brick=brick)
                R = [
                    dict(brickname=brick, iblob=iblob, result=res)
                    for iblob, res in brickresults.items()
                ]
                last_checkpoint_size[brick] = len(brickresults)
                nblobs, _ = get_brick_nblobs(brick, '(unknown)')
                print('Writing interim checkpoint', checkpoint_fn, ':',
                      len(brickresults), 'of', nblobs, 'results')
                _write_checkpoint(R, checkpoint_fn)
            last_checkpoint = tnow

        # Read any checkpointed results sent by the input thread
        c = Counter()
        while True:
            try:
                (brick, iblob, res) = checkpointqueue.get(block=False)
            except:
                break
            if not brick in allresults:
                allresults[brick] = {}
            allresults[brick][iblob] = res
            c[brick] += 1
        #if len(c):
        #    print('Read checkpointed results:', c)
        for brick, n in c.most_common():
            nblobs, _ = get_brick_nblobs(brick, '(unknown)')
            #print('Brick', brick, ': now', len(allresults[brick]), 'of', nblobs, 'done')
            check_brick_done(brick)

        try:
            brick, iblob, msg = outqueue.get(timeout=60)
        except:
            # timeout
            continue

        if msg == 'cancel':
            if not brick in brick_cancelled:
                brick_cancelled[brick] = set()
            brick_cancelled[brick].add(iblob)
            debug('Output thread: got cancel for brick', brick, 'blob', iblob)

        else:
            if msg is None:
                # short-cut empty work packet.
                continue
            # Worker sent a blob result
            result = pickle.loads(msg)
            if result is None:
                ### FIXME -- ???
                continue
            if not brick in allresults:
                allresults[brick] = {}
            allresults[brick][iblob] = result

        check_brick_done(brick)