def Matching(cf): if cf.compute.useCUDA and cf.compute.gpuID is not None: ca.SetCUDADevice(cf.compute.gpuID) # prepare output directory common.Mkdir_p(os.path.dirname(cf.io.outputPrefix)) # Output loaded config if cf.io.outputPrefix is not None: cfstr = Config.ConfigToYAML(MatchingConfigSpec, cf) with open(cf.io.outputPrefix + "parsedconfig.yaml", "w") as f: f.write(cfstr) mType = ca.MEM_DEVICE if cf.compute.useCUDA else ca.MEM_HOST I0 = common.LoadITKImage(cf.study.I0, mType) I1 = common.LoadITKImage(cf.study.I1, mType) #ca.DivC_I(I0,255.0) #ca.DivC_I(I1,255.0) grid = I0.grid() ca.ThreadMemoryManager.init(grid, mType, 1) #common.DebugHere() # TODO: need to work on these t = [x*1./cf.optim.nTimeSteps for x in range(cf.optim.nTimeSteps+1)] checkpointinds = range(1,len(t)) checkpointstates = [(ca.Field3D(grid,mType),ca.Field3D(grid,mType)) for idx in checkpointinds] p = MatchingVariables(I0,I1, cf.vectormomentum.sigma, t,checkpointinds, checkpointstates, cf.vectormomentum.diffOpParams[0], cf.vectormomentum.diffOpParams[1], cf.vectormomentum.diffOpParams[2], cf.optim.Niter, cf.optim.stepSize, cf.optim.maxPert, cf.optim.nTimeSteps, integMethod = cf.optim.integMethod, optMethod=cf.optim.method, nInv=cf.optim.NIterForInverse,plotEvery=cf.io.plotEvery, plotSlice = cf.io.plotSlice, quiverEvery = cf.io.quiverEvery, outputPrefix = cf.io.outputPrefix) RunMatching(p) # write output if cf.io.outputPrefix is not None: # reset all variables by shooting once, may have been overwritten CAvmCommon.IntegrateGeodesic(p.m0,p.t,p.diffOp,\ p.m, p.g, p.ginv,\ p.scratchV1, p.scratchV2,p. scratchV3,\ p.checkpointstates, p.checkpointinds,\ Ninv=p.nInv, integMethod = p.integMethod) common.SaveITKField(p.m0, cf.io.outputPrefix+"m0.mhd") common.SaveITKField(p.ginv, cf.io.outputPrefix+"phiinv.mhd") common.SaveITKField(p.g, cf.io.outputPrefix+"phi.mhd")
def BuildAtlas(cf): """Worker for running Atlas construction on a subset of individuals. Runs Atlas on this subset sequentially. The variations retuned are summed up to get update for all individuals """ localRank = Compute.GetMPIInfo()['local_rank'] rank = Compute.GetMPIInfo()['rank'] # prepare output directory common.Mkdir_p(os.path.dirname(cf.io.outputPrefix)) # just one reporter process on each node isReporter = rank == 0 cf.study.numSubjects = len(cf.study.subjectImages) if isReporter: # Output loaded config if cf.io.outputPrefix is not None: cfstr = Config.ConfigToYAML(AtlasConfigSpec, cf) with open(cf.io.outputPrefix + "parsedconfig.yaml", "w") as f: f.write(cfstr) #common.DebugHere() # if MPI check if processes are greater than number of subjects. it is okay if there are more subjects than processes if cf.compute.useMPI and (cf.study.numSubjects < cf.compute.numProcesses): raise Exception("Please don't use more processes " + "than total number of individuals") # subdivide data, create subsets for this thread to work on nodeSubjectIds = cf.study.subjectIds[rank::cf.compute.numProcesses] nodeImages = cf.study.subjectImages[rank::cf.compute.numProcesses] nodeWeights = cf.study.subjectWeights[rank::cf.compute.numProcesses] numLocalSubjects = len(nodeImages) print 'rank:', rank, ', localRank:', localRank, ', nodeImages:', nodeImages, ', nodeWeights:', nodeWeights # mem type is determined by whether or not we're using CUDA mType = ca.MEM_DEVICE if cf.compute.useCUDA else ca.MEM_HOST # load data in memory # load intercepts J_array = [ common.LoadITKImage(f, mType) if isinstance(f, str) else f for f in nodeImages ] # get imGrid from data imGrid = J_array[0].grid() # atlas image atlas = ca.Image3D(imGrid, mType) # allocate memory to store only the initial momenta for each individual in this thread m_array = [ca.Field3D(imGrid, mType) for i in range(numLocalSubjects)] # allocate only one copy of scratch memory to be reused for each local individual in this thread in loop p = WarpVariables(imGrid, mType, cf.vectormomentum.diffOpParams[0], cf.vectormomentum.diffOpParams[1], cf.vectormomentum.diffOpParams[2], cf.optim.NIterForInverse, cf.vectormomentum.sigma, cf.optim.stepSize, integMethod=cf.optim.integMethod) # memory to accumulate numerators and denominators for atlas from # local individuals which will be summed across MPI threads sumSplatI = ca.Image3D(imGrid, mType) sumJac = ca.Image3D(imGrid, mType) # start up the memory manager for scratch variables ca.ThreadMemoryManager.init(imGrid, mType, 0) # need some host memory in np array format for MPI reductions if cf.compute.useMPI: mpiImageBuff = None if mType == ca.MEM_HOST else ca.Image3D( imGrid, ca.MEM_HOST) t = [ x * 1. / (cf.optim.nTimeSteps) for x in range(cf.optim.nTimeSteps + 1) ] cpinds = range(1, len(t)) msmtinds = [ len(t) - 2 ] # since t=0 is not in cpinds, thats just identity deformation so not checkpointed cpstates = [(ca.Field3D(imGrid, mType), ca.Field3D(imGrid, mType)) for idx in cpinds] gradAtMsmts = [ca.Image3D(imGrid, mType) for idx in msmtinds] EnergyHistory = [] # TODO: better initializations # initialize atlas image with zeros. ca.SetMem(atlas, 0.0) # initialize momenta with zeros for m0_individual in m_array: ca.SetMem(m0_individual, 0.0) ''' # initial template image ca.SetMem(groupState.I0, 0.0) tmp = ca.ManagedImage3D(imGrid, mType) for tdisc in tdiscGroup: if tdisc.J is not None: ca.Copy(tmp, tdisc.J) groupState.I0 += tmp del tmp if cf.compute.useMPI: Compute.Reduce(groupState.I0, mpiImageBuff) # divide by total num subjects groupState.I0 /= cf.study.numSubjects ''' # preprocessinput # assign atlas reference to p.I0. This reference will not change. p.I0 = atlas # run the loop for it in range(cf.optim.Niter): # run one iteration of warp for each individual and update # their own initial momenta and also accumulate SplatI and Jac ca.SetMem(sumSplatI, 0.0) ca.SetMem(sumJac, 0.0) TotalVEnergy = np.array([0.0]) TotalIEnergy = np.array([0.0]) for itsub in range(numLocalSubjects): # initializations for this subject, this only assigns # reference to image variables p.m0 = m_array[itsub] Imsmts = [J_array[itsub]] # run warp iteration VEnergy, IEnergy = RunWarpIteration(nodeSubjectIds[itsub], cf, p, t, Imsmts, cpinds, cpstates, msmtinds, gradAtMsmts, it) # gather relevant results ca.Add_I(sumSplatI, p.sumSplatI) ca.Add_I(sumJac, p.sumJac) TotalVEnergy[0] += VEnergy TotalIEnergy[0] += IEnergy # if there are multiple nodes we'll need to sum across processes now if cf.compute.useMPI: # do an MPI sum Compute.Reduce(sumSplatI, mpiImageBuff) Compute.Reduce(sumJac, mpiImageBuff) # also sum up energies of other nodes mpi4py.MPI.COMM_WORLD.Allreduce(mpi4py.MPI.IN_PLACE, TotalVEnergy, op=mpi4py.MPI.SUM) mpi4py.MPI.COMM_WORLD.Allreduce(mpi4py.MPI.IN_PLACE, TotalIEnergy, op=mpi4py.MPI.SUM) EnergyHistory.append([TotalVEnergy[0], TotalIEnergy[0]]) # now divide to get the new atlas image ca.Div(atlas, sumSplatI, sumJac) # keep track of energy in this iteration if isReporter and cf.io.plotEvery > 0 and (( (it + 1) % cf.io.plotEvery == 0) or (it == cf.optim.Niter - 1)): # plots AtlasPlots(cf, p, atlas, m_array, EnergyHistory) if isReporter: # print out energy (VEnergy, IEnergy) = EnergyHistory[-1] print "Iter", it, "of", cf.optim.Niter, ":", VEnergy + IEnergy, '(Total) = ', VEnergy, '(Vector) + ', IEnergy, '(Image)' # write output images and fields AtlasWriteOutput(cf, atlas, m_array, nodeSubjectIds, isReporter)
def MatchingImageMomenta(cf): """Runs matching for image momenta pair.""" if cf.compute.useCUDA and cf.compute.gpuID is not None: ca.SetCUDADevice(cf.compute.gpuID) common.DebugHere() # prepare output directory common.Mkdir_p(os.path.dirname(cf.io.outputPrefix)) # Output loaded config if cf.io.outputPrefix is not None: cfstr = Config.ConfigToYAML(MatchingImageMomentaConfigSpec, cf) with open(cf.io.outputPrefix + "parsedconfig.yaml", "w") as f: f.write(cfstr) # mem type is determined by whether or not we're using CUDA mType = ca.MEM_DEVICE if cf.compute.useCUDA else ca.MEM_HOST # load data in memory I0 = common.LoadITKImage(cf.study.I, mType) m0 = common.LoadITKField(cf.study.m, mType) J1 = common.LoadITKImage(cf.study.J, mType) n1 = common.LoadITKField(cf.study.n, mType) # get imGrid from data imGrid = I0.grid() # create time array with checkpointing info for this geodesic to be estimated (s, scratchInd, rCpinds) = CAvmHGM.HGMSetUpTimeArray(cf.optim.nTimeSteps, [1.0], 0.001) tDiscGeodesic = CAvmHGMCommon.HGMSetupTimeDiscretizationResidual( s, rCpinds, imGrid, mType) # create the state variable for geodesic that is going to hold all info p0 = ca.Field3D(imGrid, mType) geodesicState = CAvmHGMCommon.HGMResidualState( I0, p0, imGrid, mType, cf.vectormomentum.diffOpParams[0], cf.vectormomentum.diffOpParams[1], cf.vectormomentum.diffOpParams[2], s, cf.optim.NIterForInverse, 1.0, cf.vectormomentum.sigmaM, cf.vectormomentum.sigmaI, cf.optim.stepSize, integMethod=cf.optim.integMethod) # initialize with zero ca.SetMem(geodesicState.p0, 0.0) # start up the memory manager for scratch variables ca.ThreadMemoryManager.init(imGrid, mType, 0) EnergyHistory = [] # run the loop for it in range(cf.optim.Niter): # shoot the geodesic forward CAvmHGMCommon.HGMIntegrateGeodesic(geodesicState.p0, geodesicState.s, geodesicState.diffOp, geodesicState.p, geodesicState.rho, geodesicState.rhoinv, tDiscGeodesic, geodesicState.Ninv, geodesicState.integMethod) # integrate the geodesic backward CAvmHGMCommon.HGMIntegrateAdjointsResidual(geodesicState, tDiscGeodesic, m0, J1, n1) # TODO: verify it should just be log map/simple image matching when sigmaM=\infty # gradient descent step for geodesic.p0 CAvmHGMCommon.HGMTakeGradientStepResidual(geodesicState) # compute and print energy (VEnergy, IEnergy, MEnergy) = MatchingImageMomentaComputeEnergy(geodesicState, m0, J1, n1) EnergyHistory.append( [VEnergy + IEnergy + MEnergy, VEnergy, IEnergy, MEnergy]) print "Iter", it, "of", cf.optim.Niter, ":", VEnergy + IEnergy + MEnergy, '(Total) = ', VEnergy, '(Vector) + ', IEnergy, '(Image Match) + ', MEnergy, '(Momenta Match)' # plots if cf.io.plotEvery > 0 and (((it + 1) % cf.io.plotEvery == 0) or (it == cf.optim.Niter - 1)): MatchingImageMomentaPlots(cf, geodesicState, tDiscGeodesic, EnergyHistory, m0, J1, n1, writeOutput=True) # write output MatchingImageMomentaWriteOuput(cf, geodesicState)
def GeodesicShooting(cf): # prepare output directory common.Mkdir_p(os.path.dirname(cf.io.outputPrefix)) # Output loaded config if cf.io.outputPrefix is not None: cfstr = Config.ConfigToYAML(GeodesicShootingConfigSpec, cf) with open(cf.io.outputPrefix + "parsedconfig.yaml", "w") as f: f.write(cfstr) mType = ca.MEM_DEVICE if cf.useCUDA else ca.MEM_HOST #common.DebugHere() I0 = common.LoadITKImage(cf.study.I0, mType) m0 = common.LoadITKField(cf.study.m0, mType) grid = I0.grid() ca.ThreadMemoryManager.init(grid, mType, 1) # set up diffOp if mType == ca.MEM_HOST: diffOp = ca.FluidKernelFFTCPU() else: diffOp = ca.FluidKernelFFTGPU() diffOp.setAlpha(cf.diffOpParams[0]) diffOp.setBeta(cf.diffOpParams[1]) diffOp.setGamma(cf.diffOpParams[2]) diffOp.setGrid(grid) g = ca.Field3D(grid, mType) ginv = ca.Field3D(grid, mType) mt = ca.Field3D(grid, mType) It = ca.Image3D(grid, mType) t = [ x * 1. / cf.integration.nTimeSteps for x in range(cf.integration.nTimeSteps + 1) ] checkpointinds = range(1, len(t)) checkpointstates = [(ca.Field3D(grid, mType), ca.Field3D(grid, mType)) for idx in checkpointinds] scratchV1 = ca.Field3D(grid, mType) scratchV2 = ca.Field3D(grid, mType) scratchV3 = ca.Field3D(grid, mType) # scale momenta to shoot cf.study.scaleMomenta = float(cf.study.scaleMomenta) if abs(cf.study.scaleMomenta) > 0.000000: ca.MulC_I(m0, float(cf.study.scaleMomenta)) CAvmCommon.IntegrateGeodesic(m0,t,diffOp, mt, g, ginv,\ scratchV1,scratchV2,scratchV3,\ keepstates=checkpointstates,keepinds=checkpointinds, Ninv=cf.integration.NIterForInverse, integMethod = cf.integration.integMethod) else: ca.Copy(It, I0) ca.Copy(mt, m0) ca.SetToIdentity(ginv) ca.SetToIdentity(g) # write output if cf.io.outputPrefix is not None: # scale back shotmomenta before writing if abs(cf.study.scaleMomenta) > 0.000000: ca.ApplyH(It, I0, ginv) ca.CoAd(mt, ginv, m0) ca.DivC_I(mt, float(cf.study.scaleMomenta)) common.SaveITKImage(It, cf.io.outputPrefix + "I1.mhd") common.SaveITKField(mt, cf.io.outputPrefix + "m1.mhd") common.SaveITKField(ginv, cf.io.outputPrefix + "phiinv.mhd") common.SaveITKField(g, cf.io.outputPrefix + "phi.mhd") GeodesicShootingPlots(g, ginv, I0, It, cf) if cf.io.saveFrames: SaveFrames(checkpointstates, checkpointinds, I0, It, m0, mt, cf)
def BuildHGM(cf): """Worker for running Hierarchical Geodesic Model (HGM) n for group geodesic estimation on a subset of individuals. Runs HGM on this subset sequentially. The variations retuned are summed up to get update for all individuals""" size = Compute.GetMPIInfo()['size'] rank = Compute.GetMPIInfo()['rank'] name = Compute.GetMPIInfo()['name'] localRank = Compute.GetMPIInfo()['local_rank'] nodename = socket.gethostname() # prepare output directory common.Mkdir_p(os.path.dirname(cf.io.outputPrefix)) # just one reporter process on each node isReporter = rank == 0 cf.study.numSubjects = len(cf.study.subjectIntercepts) if isReporter: # Output loaded config if cf.io.outputPrefix is not None: cfstr = Config.ConfigToYAML(HGMConfigSpec, cf) with open(cf.io.outputPrefix + "parsedconfig.yaml", "w") as f: f.write(cfstr) #common.DebugHere() # if MPI check if processes are greater than number of subjects. it is okay if there are more subjects than processes if cf.compute.useMPI and (cf.study.numSubjects < cf.compute.numProcesses): raise Exception("Please don't use more processes " + "than total number of individuals") # subdivide data, create subsets for this thread to work on nodeSubjectIds = cf.study.subjectIds[rank::cf.compute.numProcesses] nodeIntercepts = cf.study.subjectIntercepts[rank::cf.compute.numProcesses] nodeSlopes = cf.study.subjectSlopes[rank::cf.compute.numProcesses] nodeBaselineTimes = cf.study.subjectBaselineTimes[rank::cf.compute. numProcesses] sys.stdout.write( "This is process %d of %d with name: %s on machinename: %s and local rank: %d.\nnodeIntercepts: %s\n nodeSlopes: %s\n nodeBaselineTimes: %s\n" % (rank, size, name, nodename, localRank, nodeIntercepts, nodeSlopes, nodeBaselineTimes)) # mem type is determined by whether or not we're using CUDA mType = ca.MEM_DEVICE if cf.compute.useCUDA else ca.MEM_HOST # load data in memory # load intercepts J = [ common.LoadITKImage(f, mType) if isinstance(f, str) else f for f in nodeIntercepts ] # load slopes n = [ common.LoadITKField(f, mType) if isinstance(f, str) else f for f in nodeSlopes ] # get imGrid from data imGrid = J[0].grid() # create time array with checkpointing info for group geodesic (t, Jind, gCpinds) = HGMSetUpTimeArray(cf.optim.nTimeStepsGroup, nodeBaselineTimes, 0.0000001) tdiscGroup = CAvmHGMCommon.HGMSetupTimeDiscretizationGroup( t, J, n, Jind, gCpinds, mType, nodeSubjectIds) # create time array with checkpointing info for residual geodesic (s, scratchInd, rCpinds) = HGMSetUpTimeArray(cf.optim.nTimeStepsResidual, [1.0], 0.0000001) tdiscResidual = CAvmHGMCommon.HGMSetupTimeDiscretizationResidual( s, rCpinds, imGrid, mType) # create group state and residual state groupState = CAvmHGMCommon.HGMGroupState( imGrid, mType, cf.vectormomentum.diffOpParamsGroup[0], cf.vectormomentum.diffOpParamsGroup[1], cf.vectormomentum.diffOpParamsGroup[2], t, cf.optim.NIterForInverse, cf.vectormomentum.varIntercept, cf.vectormomentum.varSlope, cf.vectormomentum.varInterceptReg, cf.optim.stepSizeGroup, integMethod=cf.optim.integMethodGroup) #ca.Copy(groupState.I0, common.LoadITKImage('/usr/sci/projects/ADNI/nikhil/software/vectormomentumtest/TestData/FlowerData/Longitudinal/GroupGeodesic/I0.mhd', mType)) # note that residual state is treated a scratch variable in this algorithm and reused for computing residual geodesics of multiple individual residualState = CAvmHGMCommon.HGMResidualState( None, None, imGrid, mType, cf.vectormomentum.diffOpParamsResidual[0], cf.vectormomentum.diffOpParamsResidual[1], cf.vectormomentum.diffOpParamsResidual[2], s, cf.optim.NIterForInverse, cf.vectormomentum.varIntercept, cf.vectormomentum.varSlope, cf.vectormomentum.varInterceptReg, cf.optim.stepSizeResidual, integMethod=cf.optim.integMethodResidual) # start up the memory manager for scratch variables ca.ThreadMemoryManager.init(imGrid, mType, 0) # need some host memory in np array format for MPI reductions if cf.compute.useMPI: mpiImageBuff = None if mType == ca.MEM_HOST else ca.Image3D( imGrid, ca.MEM_HOST) mpiFieldBuff = None if mType == ca.MEM_HOST else ca.Field3D( imGrid, ca.MEM_HOST) for i in range(len(groupState.t) - 1, -1, -1): if tdiscGroup[i].J is not None: indx_last_individual = i break ''' # initial template image ca.SetMem(groupState.I0, 0.0) tmp = ca.ManagedImage3D(imGrid, mType) for tdisc in tdiscGroup: if tdisc.J is not None: ca.Copy(tmp, tdisc.J) groupState.I0 += tmp del tmp if cf.compute.useMPI: Compute.Reduce(groupState.I0, mpiImageBuff) # divide by total num subjects groupState.I0 /= cf.study.numSubjects ''' # run the loop for it in range(cf.optim.Niter): # compute HGM variation for group HGMGroupVariation(groupState, tdiscGroup, residualState, tdiscResidual, cf.io.outputPrefix, rank, it) common.CheckCUDAError("Error after HGM iteration") # compute gradient for momenta (m is used as scratch) # if there are multiple nodes we'll need to sum across processes now if cf.compute.useMPI: # do an MPI sum Compute.Reduce(groupState.sumSplatI, mpiImageBuff) Compute.Reduce(groupState.sumJac, mpiImageBuff) Compute.Reduce(groupState.madj, mpiFieldBuff) # also sum up energies of other nodes # intercept Eintercept = np.array([groupState.EnergyHistory[-1][1]]) mpi4py.MPI.COMM_WORLD.Allreduce(mpi4py.MPI.IN_PLACE, Eintercept, op=mpi4py.MPI.SUM) groupState.EnergyHistory[-1][1] = Eintercept[0] Eslope = np.array([groupState.EnergyHistory[-1][2]]) mpi4py.MPI.COMM_WORLD.Allreduce(mpi4py.MPI.IN_PLACE, Eslope, op=mpi4py.MPI.SUM) groupState.EnergyHistory[-1][2] = Eslope[0] ca.Copy(groupState.m, groupState.m0) groupState.diffOp.applyInverseOperator(groupState.m) ca.Sub_I(groupState.m, groupState.madj) #groupState.diffOp.applyOperator(groupState.m) # now take gradient step in momenta for group if cf.optim.method == 'FIXEDGD': # take fixed stepsize gradient step ca.Add_MulC_I(groupState.m0, groupState.m, -cf.optim.stepSizeGroup) else: raise Exception("Unknown optimization scheme: " + cf.optim.method) # end if # now divide to get the new base image for group ca.Div(groupState.I0, groupState.sumSplatI, groupState.sumJac) # keep track of energy in this iteration if isReporter and cf.io.plotEvery > 0 and (( (it + 1) % cf.io.plotEvery == 0) or (it == cf.optim.Niter - 1)): HGMPlots(cf, groupState, tdiscGroup, residualState, tdiscResidual, indx_last_individual, writeOutput=True) if isReporter: (VEnergy, IEnergy, SEnergy) = groupState.EnergyHistory[-1] print datetime.datetime.now().time( ), " Iter", it, "of", cf.optim.Niter, ":", VEnergy + IEnergy + SEnergy, '(Total) = ', VEnergy, '(Vector) + ', IEnergy, '(Intercept) + ', SEnergy, '(Slope)' # write output images and fields HGMWriteOutput(cf, groupState, tdiscGroup, isReporter)
def BuildGeoReg(cf): """Worker for running geodesic estimation on a subset of individuals """ #common.DebugHere() localRank = Compute.GetMPIInfo()['local_rank'] rank = Compute.GetMPIInfo()['rank'] # prepare output directory common.Mkdir_p(os.path.dirname(cf.io.outputPrefix)) # just one reporter process on each node isReporter = rank == 0 # load filenames and times for all subjects (subjectsIds, subjectsImagePaths, subjectsTimes) = GeoRegLoadSubjectsDetails(cf.study.subjectFile) cf.study.numSubjects = len(subjectsIds) if isReporter: # Output loaded config if cf.io.outputPrefix is not None: cfstr = Config.ConfigToYAML(GeoRegConfigSpec, cf) with open(cf.io.outputPrefix + "parsedconfig.yaml", "w") as f: f.write(cfstr) # if MPI check if processes are greater than number of subjects. it is okay if there are more subjects than processes if cf.compute.useMPI and (len(subjectsIds) < cf.compute.numProcesses): raise Exception("Please don't use more processes " + "than total number of individuals") nodeSubjectsIds = subjectsIds[rank::cf.compute.numProcesses] nodeSubjectsImagePaths = subjectsImagePaths[rank::cf.compute.numProcesses] nodeSubjectsTimes = subjectsTimes[rank::cf.compute.numProcesses] numLocalSubjects = len(nodeSubjectsImagePaths) if cf.study.initializationsFile is not None: (subjectsInitialImages, subjectsInitialMomenta) = GeoRegLoadSubjectsInitializations( cf.study.initializationsFile) nodeSubjectsInitialImages = subjectsInitialImages[rank::cf.compute. numProcesses] nodeSubjectsInitialMomenta = subjectsInitialMomenta[rank::cf.compute. numProcesses] print 'rank:', rank, ', localRank:', localRank, ', numberSubjects/TotalSubjects:', len( nodeSubjectsImagePaths ), '/', cf.study.numSubjects, ', nodeSubjectsImagePaths:', nodeSubjectsImagePaths, ', nodeSubjectsTimes:', nodeSubjectsTimes # mem type is determined by whether or not we're using CUDA mType = ca.MEM_DEVICE if cf.compute.useCUDA else ca.MEM_HOST # setting gpuid should be handled in gpu # if using GPU set device based on local rank #if cf.compute.useCUDA: # ca.SetCUDADevice(localRank) # get image size information dummyImToGetGridInfo = common.LoadITKImage(nodeSubjectsImagePaths[0][0], mType) imGrid = dummyImToGetGridInfo.grid() if cf.study.setUnitSpacing: imGrid.setSpacing(ca.Vec3Df(1.0, 1.0, 1.0)) if cf.study.setZeroOrigin: imGrid.setOrigin(ca.Vec3Df(0, 0, 0)) #del dummyImToGetGridInfo; # start up the memory manager for scratch variables ca.ThreadMemoryManager.init(imGrid, mType, 0) # allocate memory p = GeoRegVariables(imGrid, mType, cf.vectormomentum.diffOpParams[0], cf.vectormomentum.diffOpParams[1], cf.vectormomentum.diffOpParams[2], cf.optim.NIterForInverse, cf.vectormomentum.sigma, cf.optim.stepSize, integMethod=cf.optim.integMethod) # for each individual run geodesic regression for each subject for i in range(numLocalSubjects): # initializations for this subject if cf.study.initializationsFile is not None: # assuming the initializations are already preprocessed, in terms of intensities, origin and voxel scalings. p.I0 = common.LoadITKImage(nodeSubjectsInitialImages[i], mType) p.m0 = common.LoadITKField(nodeSubjectsInitialMomenta[i], mType) else: ca.SetMem(p.m0, 0.0) ca.SetMem(p.I0, 0.0) # allocate memory specific to this subject in steps a, b and c # a. create time array with checkpointing info for regression geodesic, allocate checkpoint memory (t, msmtinds, cpinds) = GeoRegSetUpTimeArray(cf.optim.nTimeSteps, nodeSubjectsTimes[i], 0.001) cpstates = [(ca.Field3D(imGrid, mType), ca.Field3D(imGrid, mType)) for idx in cpinds] # b. allocate gradAtMeasurements of the length of msmtindex for storing residuals gradAtMsmts = [ca.Image3D(imGrid, mType) for idx in msmtinds] # c. load timepoint images for this subject Imsmts = [ common.LoadITKImage(f, mType) if isinstance(f, str) else f for f in nodeSubjectsImagePaths[i] ] # reset stepsize if adaptive stepsize changed it inside p.stepSize = cf.optim.stepSize # preprocessimages GeoRegPreprocessInput(nodeSubjectsIds[i], cf, p, t, Imsmts, cpinds, cpstates, msmtinds, gradAtMsmts) # run regression for this subject # REMEMBER # msmtinds index into cpinds # gradAtMsmts is parallel to msmtinds # cpinds index into t EnergyHistory = RunGeoReg(nodeSubjectsIds[i], cf, p, t, Imsmts, cpinds, cpstates, msmtinds, gradAtMsmts) # write output images and fields for this subject # TODO: BEWARE There are hardcoded numbers inside preprocessing code specific for ADNI/OASIS brain data. GeoRegWriteOuput(nodeSubjectsIds[i], cf, p, t, Imsmts, cpinds, cpstates, msmtinds, gradAtMsmts, EnergyHistory) # clean up memory specific to this subject del t, Imsmts, cpinds, cpstates, msmtinds, gradAtMsmts