def multipleCore(inputFile, outputDir, skipHDF5): from mpi4py import MPI from geobipy.src.base import MPI as myMPI world = MPI.COMM_WORLD myMPI.rankPrint(world, 'Running EMinv1D_MCMC') UP = import_module(inputFile, package=None) AllData = eval(UP.dataInit) # Initialize the data object on master if (world.rank == 0): AllData.read(UP.dataFname, UP.sysFname) myData = AllData.Bcast(world) if (world.rank == 0): myData = AllData myMPI.rankPrint(world, 'Data Broadcast') assert ( world.size <= myData.N + 1 ), 'Do not ask for more cores than you have data points! Cores:nData ' + str( [world.size, myData.N]) allGroup = world.Get_group() masterGroup = allGroup.Incl([0]) masterComm = world.Create(masterGroup) t0 = MPI.Wtime() t1 = t0 prng = myMPI.getParallelPrng(world, MPI.Wtime) # Make sure the line results folders exist try: makedirs(outputDir) except: pass # Get a datapoint, it doesnt matter which one DataPoint = myData.getDataPoint(0) # Read in the user parameters paras = UP.userParameters(DataPoint) # Check the parameters paras.check(DataPoint) # Initialize the inversion to obtain the sizes of everything [paras, Mod, D, prior, posterior, PhiD] = Initialize(paras, DataPoint, prng=prng) # Create the results template Res = Results(paras.save, paras.plot, paras.savePNG, paras, D, Mod) world.barrier() myMPI.rankPrint(world, 'Initialized Results') # Get the line numbers in the data lines = np.unique(myData.line) lines.sort() nLines = lines.size world.barrier() myMPI.rankPrint(world, 'Creating HDF5 files, this may take a few minutes...') ### Only do this using the subcommunicator! if (masterComm != MPI.COMM_NULL): for i in range(nLines): j = np.where(myData.line == lines[i])[0] fName = join(outputDir, str(lines[i]) + '.h5') with h5py.File(fName, 'w', driver='mpio', comm=masterComm) as f: LR = LineResults() LR.createHdf(f, myData.id[j], Res) myMPI.rankPrint( world, 'Time to create the line with {} data points: {:.3f} s'.format( j.size, MPI.Wtime() - t0)) t0 = MPI.Wtime() world.barrier() # Open the files collectively LR = [None] * nLines for i in range(nLines): fName = join(outputDir, str(lines[i]) + '.h5') LR[i] = LineResults(fName, hdfFile=h5py.File(fName, 'a', driver='mpio', comm=world)) # myMPI.print("rank {} line {} iDs {}".format(world.rank, i, LR[i].iDs)) world.barrier() myMPI.rankPrint(world, 'Files Created in {:.3f} s'.format(MPI.Wtime() - t1)) t0 = MPI.Wtime() # Carryout the master-worker tasks if (world.rank == 0): masterTask(myData, world) else: workerTask(myData, UP, prng, world, LR) world.barrier() # Close all the files for i in range(nLines): LR[i].close()
def multipleCore(inputFile, outputDir, skipHDF5): from mpi4py import MPI from geobipy.src.base import MPI as myMPI world = MPI.COMM_WORLD myMPI.rankPrint(world, 'Running EMinv1D_MCMC') myMPI.rankPrint(world, 'Using user input file {}'.format(inputFile)) rank = world.rank nRanks = world.size masterRank = rank == 0 # Start keeping track of time. t0 = MPI.Wtime() t1 = t0 UP = import_module(inputFile, package=None) # Make data and system filenames lists of str. if isinstance(UP.dataFilename, str): UP.dataFilename = [UP.dataFilename] if isinstance(UP.systemFilename, str): UP.systemFilename = [UP.systemFilename] # Everyone needs the system classes read in early. Dataset = eval(customFunctions.safeEval(UP.dataInit)) Dataset.readSystemFile(UP.systemFilename) # Get the number of points in the file. if masterRank: nPoints = Dataset._readNpoints(UP.dataFilename) assert (nRanks - 1 <= nPoints + 1), Exception( 'Do not ask for more cores than you have data points! Cores:nData {}:{} ' .format(nRanks, nPoints)) # Create a communicator containing only the master rank. allGroup = world.Get_group() masterGroup = allGroup.Incl([0]) masterComm = world.Create(masterGroup) # Create a parallel RNG on each worker with a different seed. prng = myMPI.getParallelPrng(world, MPI.Wtime) myMPI.rankPrint(world, 'Creating HDF5 files, this may take a few minutes...') myMPI.rankPrint( world, 'Files are being created for data files {} and system files {}'.format( UP.dataFilename, UP.systemFilename)) ### Only do this using the Master subcommunicator! # Here we initialize the HDF5 files. if (masterComm != MPI.COMM_NULL): # Make sure the results folders exist try: makedirs(outputDir) except: pass # Prepare the dataset so that we can read a point at a time. Dataset._initLineByLineRead(UP.dataFilename, UP.systemFilename) # Get a datapoint from the file. DataPoint = Dataset._readSingleDatapoint() Dataset._closeDatafiles() # While preparing the file, we need access to the line numbers and fiducials in the data file tmp = fileIO.read_columns(UP.dataFilename[0], Dataset._indicesForFile[0][:2], 1, nPoints) Dataset._openDatafiles(UP.dataFilename) # Get the line numbers in the data lineNumbers = np.unique(tmp[:, 0]) lineNumbers.sort() nLines = lineNumbers.size fiducials = tmp[:, 1] # Read in the user parameters paras = UP.userParameters(DataPoint) # Check the parameters paras.check(DataPoint) # Initialize the inversion to obtain the sizes of everything [paras, Mod, D, prior, posterior, PhiD] = Initialize(paras, DataPoint, prng=prng) # Create the results template Res = Results(D, Mod, save=paras.save, plot=paras.plot, savePNG=paras.savePNG, nMarkovChains=paras.nMarkovChains, plotEvery=paras.plotEvery, parameterDisplayLimits=paras.parameterDisplayLimits, reciprocateParameters=paras.reciprocateParameters) # For each line. Get the fiducials, and create a HDF5 for the Line results. # A line results file needs an initialized Results class for a single data point. for line in lineNumbers: fiducialsForLine = np.where(tmp[:, 0] == line)[0] nFids = fiducialsForLine.size # Create a filename for the current line number fName = join(outputDir, '{}.h5'.format(line)) # Open a HDF5 file in parallel mode. with h5py.File(fName, 'w', driver='mpio', comm=masterComm) as f: LR = LineResults() LR.createHdf(f, tmp[fiducialsForLine, 1], Res) myMPI.rankPrint( world, 'Time to create the line with {} data points: {:.3f} s'.format( nFids, MPI.Wtime() - t0)) t0 = MPI.Wtime() myMPI.print('Initialized Results for writing.') # Everyone needs the line numbers in order to open the results files collectively. if masterRank: DataPointType = DataPoint.hdfName() else: lineNumbers = None DataPointType = None lineNumbers = myMPI.Bcast(lineNumbers, world) nLines = lineNumbers.size DataPointType = world.bcast(DataPointType) # Open the files collectively LR = [None] * nLines for i, line in enumerate(lineNumbers): fName = join(outputDir, '{}.h5'.format(line)) LR[i] = LineResults(fName, hdfFile=h5py.File(fName, 'a', driver='mpio', comm=world)) world.barrier() myMPI.rankPrint(world, 'Files Created in {:.3f} s'.format(MPI.Wtime() - t1)) t0 = MPI.Wtime() # Carryout the master-worker tasks if (world.rank == 0): masterTask(Dataset, world) else: DataPoint = eval(customFunctions.safeEval(DataPointType)) workerTask(DataPoint, UP, prng, world, lineNumbers, LR) world.barrier() # Close all the files. Must be collective. for i in range(nLines): LR[i].close() if masterRank: Dataset._closeDatafiles()
world = MPI.COMM_WORLD rank = world.rank master = rank == 0 size = world.size myMPI.helloWorld(world) x = 1 # Set up array sizes for consistency and chunk lengths per core N = x * size+1 starts, chunks = myMPI.loadBalance_shrinkingArrays(N, size) myMPI.rankPrint(world, "start indices: {}".format(starts)) myMPI.rankPrint(world, "chunk sizes: {}".format(chunks)) ### Test base geobipy.MPI routines # data type # dt = None # if master: # x = np.full(rank+1, rank) # myMPI._isendDtype(x, dest=1, world=world) # elif rank == 1: # dt = myMPI._irecvDtype(source=0, world=world) # myMPI.orderedPrint(world, dt, 'datatype send and recv') # if not master: