Ejemplo n.º 1
0
def terminateNcsScInstances(authToken, instanceIds):
    '''try to terminate instances; return list of instances terminated (empty if none confirmed)'''
    terminationLogFilePath = os.path.join(
        dataDirPath, 'badTerminations.log')  # using global dataDirPath
    dateTimeStr = datetime.datetime.now(datetime.timezone.utc).isoformat()
    try:
        ncs.terminateInstances(authToken, instanceIds)
        logger.info('terminateInstances returned')
    except Exception as exc:
        logger.warning('got exception terminating %d instances (%s) %s',
                       len(instanceIds), type(exc), exc)
        try:
            with open(terminationLogFilePath, 'a') as logFile:
                for iid in instanceIds:
                    print(dateTimeStr, iid, sep=',', file=logFile)
        except:
            logger.warning(
                'got exception (%s) appending to terminationLogFile %s',
                type(exc), terminationLogFilePath)
        return []  # empty list meaning none may have been terminated
    else:
        return instanceIds
Ejemplo n.º 2
0
def recruitAndRender():
    '''a threadproc to recruit an instance,render frames on it, and terminate it'''
    eLoop = asyncio.new_event_loop()
    asyncio.set_event_loop( eLoop )
    
    randomPart = str( uuid.uuid4() )[0:13]
    launchedJsonFilePath = dataDirPath+'/recruitLaunched_' + randomPart + '.json'
    resultsLogFilePath = dataDirPath+'/recruitInstance_' + randomPart + '.jlog'
    try:
        instance = recruitInstance( launchedJsonFilePath, resultsLogFilePath )
        #instances = recruitInstances( 1, launchedJsonFilePath, True, resultsLogFilePath )
    except Exception as exc:
        logger.info( 'got exception from recruitInstance (%s) %s', type(exc), exc )
        return -13
    if not instance:
        logger.warning( 'no good instance from recruit')
        return -14
    else:
        renderFramesOnInstance( instance )
        #return renderFramesOnInstance( instances[0] )
        iid = instance['instanceId']
        logOperation( 'terminateFinal', [iid], '<master>' )
        ncs.terminateInstances( args.authToken, [iid] )
Ejemplo n.º 3
0
def renderFramesOnInstance( inst ):
    timeLimit = min( args.frameTimeLimit, args.timeLimit )
    rsyncTimeLimit = min( 18000, timeLimit )  # was 240; have used 1800 for big files
    iid = inst['instanceId']
    abbrevIid = iid[0:16]
    g_workingInstances.append( iid )
    saveProgress()
    logger.info( 'would render frames on instance %s', abbrevIid )

    # rsync the blend file, with standardized dest file name
    logFrameState( -1, 'rsyncing', iid, 0 )
    blendFileName = 'render.blend'  # was SpinningCube_002_c.blend 'cube0c.blend'
    (rc, stderr) = rsyncToRemote( args.blendFilePath, blendFileName, inst, timeLimit=rsyncTimeLimit )
    if rc == 0:
        logFrameState( -1, 'rsynced', iid )
    else:
        logStderr( stderr.rstrip(), iid )
        logFrameState( -1, 'rsyncFailed', iid, rc )
        logger.warning( 'rc from rsync was %d', rc )
        logOperation( 'terminateFailedWorker', iid, '<master>')
        ncs.terminateInstances( args.authToken, [iid] )
        g_workingInstances.remove( iid )
        saveProgress()
        return -1  # go no further if we can't rsync to the worker

    def trackStderr( proc ):
        for line in proc.stderr:
            print( '<stderr>', abbrevIid, line.strip(), file=sys.stderr )
            logStderr( line.rstrip(), iid )

    def trackStdout( proc ):
        nonlocal frameProgress
        for line in proc.stdout:
            #print( '<stdout>', abbrevIid, line.strip(), file=sys.stderr )
            if 'Path Tracing Tile' in line:
                pass
                # yes, this progress-parsing code does work
                pat = r'Path Tracing Tile ([0-9]+)/([0-9]+)'
                match = re.search( pat, line )
                if match:
                    frameProgress = float( match.group(1) ) / float( match.group(2) )
            elif '| Updating ' in line:
                pass
            elif '| Synchronizing object |' in line:
                pass
            elif line.strip():
                print( '<stdout>', abbrevIid, line.strip(), file=sys.stderr )
                logStdout( line.rstrip(), iid )
    nFailures = 0    
    while len( g_framesFinished) < g_nFramesWanted:
        if sigtermSignaled():
            break
        if time.time() >= g_deadline:
            logger.warning( 'exiting thread because global deadline has passed' )
            break

        if nFailures >= 3:
            logger.warning( 'exiting thread because instance has encountered %d failures', nFailures )
            logOperation( 'terminateFailedWorker', iid, '<master>')
            ncs.terminateInstances( args.authToken, [iid] )
            break
        #logger.info( '%s would claim a frame; %d done so far', abbrevIid, len( g_framesFinished) )
        try:
            frameNum = g_framesToDo.popleft()
        except IndexError:
            #logger.info( 'empty g_framesToDo' )
            time.sleep(10)
            overageFactor = 3
            nUnfinished = g_nFramesWanted - len(g_framesFinished)
            nWorkers = len( g_workingInstances )
            if nWorkers > (nUnfinished * overageFactor):
                logger.warning( 'exiting thread because not many left to do (%d unfinished, %d workers)',
                    nUnfinished, nWorkers )
                logOperation( 'terminateExcessWorker', iid, '<master>')
                g_workingInstances.remove( iid )
                ncs.terminateInstances( args.authToken, [iid] )
                break
            continue

        frameDetails = { 'frameNum': frameNum, 'elapsedTime': 0, 'progress': 0 }
        frameDetails[ 'lastDateTime' ] = datetime.datetime.now(datetime.timezone.utc).isoformat()
        g_.frameDetails[ frameNum ] = frameDetails

        outFileName = g_outFilePattern.replace( '######', '%06d' % frameNum )
        returnCode = None
        pyExpr = ''
        if args.width > 0 and args.height > 0:
            pyExpr = '--python-expr "import bpy; scene=bpy.context.scene; '\
                'scene.render.resolution_x=%d; scene.render.resolution_y=%d; '\
                'scene.render.resolution_percentage=100"' % (args.width, args.height)
        #cmd = 'blender -b -noaudio --version'
        cmd = 'blender -b -noaudio --enable-autoexec %s %s -o %s --render-format %s -f %d' % \
            (blendFileName, pyExpr, g_outFilePattern, args.frameFileType, frameNum)

        logger.info( 'commanding %s', cmd )
        sshSpecs = inst['ssh']

        curFrameRendered = False
        logFrameState( frameNum, 'starting', iid )
        frameStartDateTime = datetime.datetime.now(datetime.timezone.utc)
        with subprocess.Popen(['ssh',
                            '-p', str(sshSpecs['port']),
                            '-o', 'ServerAliveInterval=360',
                            '-o', 'ServerAliveCountMax=3',
                            sshSpecs['user'] + '@' + sshSpecs['host'], cmd],
                            encoding='utf8',
                            stdout=subprocess.PIPE,  # subprocess.PIPE subprocess.DEVNULL
                            stderr=subprocess.PIPE) as proc:
            frameProgress = 0
            frameProgressReported = 0
            deadline = min( g_deadline, time.time() + timeLimit )
            stdoutThr = threading.Thread(target=trackStdout, args=(proc,))
            stdoutThr.start()
            stderrThr = threading.Thread(target=trackStderr, args=(proc,))
            stderrThr.start()
            while time.time() < deadline:
                proc.poll() # sets proc.returncode
                if proc.returncode == None:
                    if frameProgress > min( .99, frameProgressReported + .01 ):
                        logger.info( 'frame %d on %s is %.1f %% done', frameNum, abbrevIid, frameProgress*100 )
                        frameProgressReported = frameProgress
                        rightNow = datetime.datetime.now(datetime.timezone.utc)
                        frameDetails[ 'lastDateTime' ] = rightNow.isoformat()
                        frameDetails[ 'elapsedTime' ] = (rightNow - frameStartDateTime).total_seconds()
                        frameDetails[ 'progress' ] = frameProgress
                        saveProgress()
                    if ((deadline - time.time() < timeLimit/2)) and frameProgress < .5:
                        logger.warning( 'frame %d on %s seems slow', frameNum, abbrevIid )
                        logFrameState( frameNum, 'seemsSlow', iid, frameProgress )
                else:
                    if proc.returncode == 0:
                        logger.info( 'frame %d on %s succeeded', frameNum, abbrevIid )
                        curFrameRendered = True
                    else:
                        logger.warning( 'instance %s gave returnCode %d', abbrevIid, proc.returncode )
                    break
                if sigtermSignaled():
                    break
                time.sleep(10)
            returnCode = proc.returncode if proc.returncode != None else 124
            if returnCode:
                logger.warning( 'renderFailed for frame %d on %s', frameNum, iid )
                logFrameState( frameNum, 'renderFailed', iid, returnCode )
                frameDetails[ 'progress' ] = 0
                g_framesToDo.append( frameNum )
                saveProgress()
                time.sleep(10) # maybe we should retire this instance; at least, making it sleep so it is less competitive
            else:
                logFrameState( frameNum, 'rendered', iid )
                #g_framesFinished.append( frameNum )  # too soon

            proc.terminate()
            try:
                proc.wait(timeout=5)
                if proc.returncode:
                    logger.warning( 'ssh return code %d', proc.returncode )
            except subprocess.TimeoutExpired:
                logger.warning( 'ssh did not terminate in time' )
            stdoutThr.join()
            stderrThr.join()
        # may not need this logging here
        if returnCode != 0:
            logger.warning( 'blender returnCode %d for %s', returnCode, abbrevIid )
        if curFrameRendered:
            logFrameState( frameNum, 'retrieving', iid )
            (returnCode, stderr) = scpFromRemote( 
                outFileName, os.path.join( dataDirPath, outFileName ), inst
                )
            if returnCode == 0:
                logFrameState( frameNum, 'retrieved', iid )
                logger.info( 'retrieved frame %d', frameNum )
                logger.info( 'finished %d frames out of %d', len( g_framesFinished), g_nFramesWanted )
                g_framesFinished.append( frameNum )
                rightNow = datetime.datetime.now(datetime.timezone.utc)
                frameDetails[ 'lastDateTime' ] = rightNow.isoformat()
                frameDetails[ 'elapsedTime' ] = (rightNow - frameStartDateTime).total_seconds()
                frameDetails[ 'progress' ] = 1.0
            else:
                logStderr( stderr.rstrip(), iid )
                logFrameState( frameNum, 'retrieveFailed', iid, returnCode )
                frameDetails[ 'progress' ] = 0
                g_framesToDo.append( frameNum )
            saveProgress()
        if returnCode:
            nFailures += 1
    if iid in g_workingInstances:
        g_workingInstances.remove( iid )
        saveProgress()
    return 0
Ejemplo n.º 4
0
def recruitInstances( nWorkersWanted, launchedJsonFilePath, launchWanted, resultsLogFilePath='' ):
    '''launch instances and install blender on them;
        terminate those that could not install; return list of good instances'''
    logger.info( 'recruiting up to %d instances', nWorkersWanted )
    if not resultsLogFilePath:
        resultsLogFilePath = dataDirPath+'/recruitInstances.jlog'
    goodInstances = []
    if launchWanted:
        logger.info( 'recruiting %d instances', nWorkersWanted )
        nAvail = ncs.getAvailableDeviceCount( args.authToken, filtersJson=args.filter )
        if nWorkersWanted > (nAvail + 0):
            logger.error( 'not enough devices available (%d requested, %d avail)', nWorkersWanted, nAvail )
            raise ValueError( 'not enough devices available')
        # prepare sshClientKey for launch
        if args.sshClientKeyName:
            sshClientKeyName = args.sshClientKeyName
        else:
            keyContents = loadSshPubKey().strip()
            randomPart = str( uuid.uuid4() )[0:13]
            #keyContents += ' #' + randomPart
            sshClientKeyName = 'bfr_%s' % (randomPart)
            respCode = ncs.uploadSshClientKey( args.authToken, sshClientKeyName, keyContents )
            if respCode < 200 or respCode >= 300:
                logger.warning( 'ncs.uploadSshClientKey returned %s', respCode )
                sys.exit( 'could not upload SSH client key')
        #launch
        #logResult( 'operation', {'launchInstances': nWorkersWanted}, '<recruitInstances>' )
        logOperation( 'launchInstances', nWorkersWanted, '<recruitInstances>' )
        rc = launchInstances( args.authToken, nWorkersWanted,
            sshClientKeyName, launchedJsonFilePath, filtersJson=args.filter,
            encryptFiles = args.encryptFiles
            )
        if rc:
            logger.debug( 'launchInstances returned %d', rc )
        # delete sshClientKey only if we just uploaded it
        if sshClientKeyName != args.sshClientKeyName:
            logger.info( 'deleting sshClientKey %s', sshClientKeyName)
            ncs.deleteSshClientKey( args.authToken, sshClientKeyName )
    launchedInstances = []
    # get instances from the launched json file
    with open( launchedJsonFilePath, 'r') as jsonInFile:
        try:
            launchedInstances = json.load(jsonInFile)  # an array
        except Exception as exc:
            logger.warning( 'could not load json (%s) %s', type(exc), exc )
    if len( launchedInstances ) < nWorkersWanted:
        logger.warning( 'could not launch as many instances as wanted (%d vs %d)',
            len( launchedInstances ), nWorkersWanted )
    nonstartedIids = [inst['instanceId'] for inst in launchedInstances if inst['state'] != 'started' ]
    if nonstartedIids:
        logger.warning( 'terminating non-started instances %s', nonstartedIids )
        ncs.terminateInstances( args.authToken, nonstartedIids )
    # proceed with instances that were actually started
    startedInstances = [inst for inst in launchedInstances if inst['state'] == 'started' ]
    # add instances to knownHosts
    with open( os.path.expanduser('~/.ssh/known_hosts'), 'a' ) as khFile:
        jsonToKnownHosts.jsonToKnownHosts( startedInstances, khFile )
    # install blender on startedInstances
    if not sigtermSignaled():
        installerCmd = 'sudo apt-get -qq update && sudo apt-get -qq -y install blender > /dev/null'
        logger.info( 'calling tellInstances to install on %d instances', len(startedInstances))
        stepStatuses = tellInstances.tellInstances( startedInstances, installerCmd,
            resultsLogFilePath=resultsLogFilePath,
            download=None, downloadDestDir=None, jsonOut=None, sshAgent=args.sshAgent,
            timeLimit=min(args.instTimeLimit, args.timeLimit), upload=None, stopOnSigterm=not True,
            knownHostsOnly=True
            )
        # SHOULD restore our handler because tellInstances may have overridden it
        #signal.signal( signal.SIGTERM, sigtermHandler )
        if not stepStatuses:
            logger.warning( 'no statuses returned from installer')
            startedIids = [inst['instanceId'] for inst in startedInstances]
            logOperation( 'terminateBad', startedIids, '<recruitInstances>' )
            ncs.terminateInstances( args.authToken, startedIids )
            return []
        (goodOnes, badOnes) = triage( stepStatuses )
        #stepTiming.finish()
        #eventTimings.append(stepTiming)
        logger.info( '%d good installs, %d bad installs', len(goodOnes), len(badOnes) )
        logger.info( 'stepStatuses %s', stepStatuses )
        goodInstances = [inst for inst in startedInstances if inst['instanceId'] in goodOnes ]
        badIids = []
        for status in badOnes:
            badIids.append( status['instanceId'] )
        if badIids:
            logOperation( 'terminateBad', badIids, '<recruitInstances>' )
            ncs.terminateInstances( args.authToken, badIids )
        #if goodInstances:
        #    recycleInstances( goodInstances )
    return goodInstances
Ejemplo n.º 5
0
def recruitInstance( launchedJsonFilePath, resultsLogFilePathIgnored ):
    logger.info( 'recruiting 1 instance' )
    nWorkersWanted = 1
    # prepare sshClientKey for launch
    if args.sshClientKeyName:
        sshClientKeyName = args.sshClientKeyName
    else:
        keyContents = loadSshPubKey().strip()
        randomPart = str( uuid.uuid4() )[0:13]
        #keyContents += ' #' + randomPart
        sshClientKeyName = 'bfr_%s' % (randomPart)
        respCode = ncs.uploadSshClientKey( args.authToken, sshClientKeyName, keyContents )
        if respCode < 200 or respCode >= 300:
            logger.warning( 'ncs.uploadSshClientKey returned %s', respCode )
            raise Exception( 'could not upload SSH client key')
    #launch
    logOperation( 'launchInstances', 1, '<recruitInstances>' )
    rc = launchInstances( args.authToken, 1,
        sshClientKeyName, launchedJsonFilePath, filtersJson=args.filter,
        encryptFiles = args.encryptFiles
        )
    if rc:
        logger.debug( 'launchInstances returned %d', rc )
    # delete sshClientKey only if we just uploaded it
    if sshClientKeyName != args.sshClientKeyName:
        logger.info( 'deleting sshClientKey %s', sshClientKeyName)
        ncs.deleteSshClientKey( args.authToken, sshClientKeyName )
    if rc:
        return None
    launchedInstances = []
    # get instances from the launched json file
    with open( launchedJsonFilePath, 'r') as jsonInFile:
        try:
            launchedInstances = json.load(jsonInFile)  # an array
        except Exception as exc:
            logger.warning( 'could not load json (%s) %s', type(exc), exc )
    if len( launchedInstances ) < nWorkersWanted:
        logger.warning( 'could not launch as many instances as wanted (%d vs %d)',
            len( launchedInstances ), nWorkersWanted )
    nonstartedIids = [inst['instanceId'] for inst in launchedInstances if inst['state'] != 'started' ]
    if nonstartedIids:
        logger.warning( 'terminating non-started instances %s', nonstartedIids )
        ncs.terminateInstances( args.authToken, nonstartedIids )
    # proceed with instances that were actually started
    startedInstances = [inst for inst in launchedInstances if inst['state'] == 'started' ]
    if len(startedInstances) != 1:
        logger.warning( 'launched %d instances', len(startedInstances) )
        return None

    inst = startedInstances[0]
    iid = inst['instanceId']
    abbrevIid = iid[0:16]
    def trackStderr( proc ):
        for line in proc.stderr:
            print( '<stderr>', abbrevIid, line.strip(), file=sys.stderr )
            logInstallerEvent( 'stderr', line.strip(), iid )

    if sigtermSignaled():
        logger.warning( 'terminating instance because sigtermSignaled %s', iid )
        logOperation( 'terminateFinal', [iid], '<master>' )
        ncs.terminateInstances( args.authToken, [iid] )
        return None
    else:
        # add instance to knownHosts
        with open( os.path.expanduser('~/.ssh/known_hosts'), 'a' ) as khFile:
            jsonToKnownHosts.jsonToKnownHosts( startedInstances, khFile )
        # install blender on startedInstance
        installerCmd = 'sudo apt-get -qq update && sudo apt-get -qq -y install blender > /dev/null'
        logger.info( 'installerCmd %s', installerCmd )
        sshSpecs = inst['ssh']
        deadline = min( g_deadline, time.time() + args.instTimeLimit )
        logInstallerOperation( iid, ['connect', sshSpecs['host'], sshSpecs['port']] )
        with subprocess.Popen(['ssh',
                        '-p', str(sshSpecs['port']),
                        '-o', 'ServerAliveInterval=360',
                        '-o', 'ServerAliveCountMax=3',
                        sshSpecs['user'] + '@' + sshSpecs['host'], installerCmd],
                        encoding='utf8',
                        #stdout=subprocess.PIPE,  # subprocess.PIPE subprocess.DEVNULL
                        stderr=subprocess.PIPE) as proc:
            logInstallerOperation( iid, ['command', installerCmd] )
            stderrThr = threading.Thread(target=trackStderr, args=(proc,))
            stderrThr.start()
            while time.time() < deadline:
                proc.poll() # sets proc.returncode
                if proc.returncode == None:
                    logger.info( 'waiting for install')
                else:
                    if proc.returncode == 0:
                        logger.info( 'installer succeeded on instance %s', abbrevIid )
                    else:
                        logger.warning( 'instance %s gave returnCode %d', abbrevIid, proc.returncode )
                    break
                if sigtermSignaled():
                    break
                time.sleep(30)
            proc.poll()
            returnCode = proc.returncode if proc.returncode != None else 124 # declare timeout if no rc
            if returnCode:
                logger.warning( 'installer returnCode %s', returnCode )
            if returnCode == 124:
                logInstallerEvent( 'timeout', args.instTimeLimit, iid )
            else:
                logInstallerEvent('returncode', returnCode, iid )
            proc.terminate()
            try:
                proc.wait(timeout=5)
                if proc.returncode:
                    logger.warning( 'ssh return code %d', proc.returncode )
            except subprocess.TimeoutExpired:
                logger.warning( 'ssh did not terminate in time' )
            stderrThr.join()
            if returnCode:
                logger.warning( 'terminating instance because installerFailed %s', iid )
                ncs.terminateInstances( args.authToken, [iid] )
                logOperation( 'terminateBad', [iid], '<recruitInstances>' )
                return None
            else:
                return inst
    return None
Ejemplo n.º 6
0
            parResultList = list( parIter )
        logger.info( 'finished initial thread pool')
        # wait until it is time to exit
        while len(g_framesFinished) < g_nFramesWanted and sigtermNotSignaled() and time.time()< g_deadline:
            logger.info( 'waiting for frames to finish')
            time.sleep( 10 )


    if args.launch:
        if not goodInstances:
            logger.info( 'no good instances to terminate')
        else:
            logger.info( 'terminating %d instances', len( goodInstances) )
            iids = [inst['instanceId'] for inst in goodInstances]
            logOperation( 'terminateFinal', iids, '<master>' )
            ncs.terminateInstances( args.authToken, iids )
    else:
        with open( dataDirPath + '/survivingInstances.json','w' ) as outFile:
            json.dump( list(goodInstances), outFile )

    nFramesFinished = len(g_framesFinished)
    if nFramesFinished:
        encodeTo264( dataDirPath, settingsToSave['outVideoFileName'], 
            args.frameRate, startFrame=args.startFrame,
            frameFileType=extensions[args.frameFileType] )

    elapsed = time.time() - startTime
    logger.info( 'rendered %d frames using %d "good" instances', len(g_framesFinished), len(goodInstances) )
    logger.info( 'finished; elapsed time %.1f seconds (%.1f minutes)', elapsed, elapsed/60 )
    logOperation( 'finished',
        {'nInstancesRecruited': len(goodInstances),
Ejemplo n.º 7
0
def renderFrame( frameNum ):
    frameDirPath = os.path.join( dataDirPath, 'frame_%06d' % frameNum )
    frameFileName = frameFilePattern.replace( '######', '%06d' % frameNum )
    installerFilePath = os.path.join(frameDirPath, 'data', 'runDistributedBlender.py.jlog' )

    logResult( 'frameStart', frameNum, frameNum )
    os.makedirs( frameDirPath+'/data', exist_ok=True )

    instances = allocateInstances( args.nWorkers, frameDirPath+'/data/launched.json')
    try:
        cmd = [
            scriptDirPath()+'/runDistributedBlender.py',
            os.path.realpath( args.blendFilePath ),
            '--launch', False,
            '--authToken', args.authToken,
            '--blocks_user', args.blocks_user,
            '--nWorkers', args.nWorkers,
            '--filter', args.filter,
            '--width', args.width,
            '--height', args.height,
            '--seed', args.seed,
            '--timeLimit', args.timeLimit,
            '--useCompositor', args.useCompositor,
            '--frame', frameNum
        ]
        cmd = [ str( arg ) for arg in cmd ]
        logger.info( 'frame %d, %s', frameNum, frameDirPath )
        logger.info( 'cmd %s', cmd )

        if True:
            try:
                retCode = subprocess.call( cmd, cwd=frameDirPath,
                    stdout=sys.stdout, stderr=sys.stderr
                    )
            except Exception as exc:
                logger.warning( 'runDistributedBlender call threw exception (%s) %s',type(exc), exc )
                logResult( 'exception', str(exc), frameNum )
                return exc
            else:
                logger.info( 'RC from runDistributed: %d', retCode )
        if retCode:
            logResult( 'retCode', retCode, frameNum )
            #recycleInstances( instances )
            return retCode
        frameFilePath = os.path.join( frameDirPath, 'data', frameFileName)
        if not os.path.isfile( frameFilePath ):
            logResult( 'retCode', errno.ENOENT, frameNum )
            return FileNotFoundError( errno.ENOENT, 'could not render frame', frameFileName )
        outFilePath = os.path.join(dataDirPath, frameFileName )
        logger.info( 'moving %s to %s', frameFilePath, dataDirPath )
        try:
            if os.path.isfile( outFilePath ):
                os.remove( outFilePath )
            shutil.move( os.path.join( frameDirPath, 'data', frameFileName), dataDirPath )
        except Exception as exc:
            logger.warning( 'trouble moving %s (%s) %s', frameFileName, type(exc), exc )
            logResult( 'exception', str(exc), frameNum )
            return exc
    finally:
        (byInstance, badSet) = demuxResults( installerFilePath )
        if badSet:
            logger.warning( 'instances not well installed: %s', badSet )
            badIids = []
            goodInstances = []
            # remove bad instances from the list of instances to recycle
            for inst in instances:
                iid = inst['instanceId']
                if iid in badSet:
                    badIids.append( iid )
                else:
                    goodInstances.append( inst )
            # terminate any bad instances
            if badIids:
                logResult( 'renderFrame would terminate bad instances', badIids, frameNum )
                ncs.terminateInstances( args.authToken, badIids )
                instances = goodInstances
        # recycle the (hopefully non-bad) instances
        recycleInstances( instances )

    logResult( 'frameEnd', frameNum, frameNum )
    return 0