Esempio n. 1
0
def workerTask(myData, UP, prng, world, LineResults):
    """ Define a wait run ping procedure for each worker """

    from mpi4py import MPI
    from geobipy.src.base import MPI as myMPI

    # Initialize the worker process to go
    Go = True
    # Get the mpi status
    mpi_status = MPI.Status()

    # Wait until the master sends you an index to process
    i = np.empty(2, dtype=np.int64)
    myRank = np.empty(3, dtype=np.int64)
    world.Recv(i, source=0, tag=MPI.ANY_TAG, status=mpi_status)

    # Check if a killSwitch for this worker was thrown
    if mpi_status.Get_tag() == killSwitch:
        Go = False

    lines = np.unique(myData.line)
    lines.sort()

    while Go:
        t0 = MPI.Wtime()
        # Get the data point for the given index
        DataPoint = myData.getDataPoint(i[0])
        paras = UP.userParameters(DataPoint)

        # Pass through the line results file object if a parallel file system is in use.
        iLine = lines.searchsorted(myData.line[i[0]])
        failed = Inv_MCMC(paras,
                          DataPoint,
                          myData.id[i[0]],
                          prng=prng,
                          LineResults=LineResults[iLine],
                          rank=world.rank)

        # Print a status update
        if (not failed):
            myMPI.print(
                str(world.rank) + ' ' + str(i[0]) + ' ' +
                str(MPI.Wtime() - t0))

        # Send the current rank number to the master
        myRank[:] = (world.rank, i[0], 0)

        if failed:
            # With the Data Point failed, Ping the Master to request a new index
            world.Send(myRank, dest=0, tag=dpFailed)
        else:
            # With the Data Point inverted, Ping the Master to request a new index
            world.Send(myRank, dest=0, tag=dpWin)

        # Wait till you are told what to process next
        world.Recv(i, source=0, tag=MPI.ANY_TAG, status=mpi_status)

        # Check if a killSwitch for this worker was thrown
        if mpi_status.Get_tag() == killSwitch:
            Go = False
Esempio n. 2
0
def masterTask(myData, world):
    """ Define a Send Recv Send procedure on the master """

    from mpi4py import MPI
    from geobipy.src.base import MPI as myMPI

    # Set the total number of data points

    N = myData.N

    # Create and shuffle and integer list for the number of data points
    randomizedPointIndices = np.arange(N)
    np.random.shuffle(randomizedPointIndices)

    nFinished = 0
    nSent = 0
    dataSend = np.zeros(1, dtype=np.int64)
    rankRecv = np.zeros(3, dtype=np.float32)

    # Send out the first indices to the workers
    for iWorker in range(1, world.size):
        dataSend[:] = randomizedPointIndices[nSent]
        world.Send(dataSend, dest=iWorker, tag=run)
        nSent += 1

    # Start a timer
    t0 = MPI.Wtime()

    myMPI.print("Initial data points sent. Master is now waiting for requests")

    # Now wait to send indices out to the workers as they finish until the entire data set is finished
    while nFinished < N:
        # Wait for a worker to ping you

        world.Recv(rankRecv,
                   source=MPI.ANY_SOURCE,
                   tag=MPI.ANY_TAG,
                   status=MPI.Status())
        workerRank = np.int(rankRecv[0])
        dataPointProcessed = np.int(rankRecv[1])

        nFinished += 1

        # Send out the next point if the list is not empty
        if (nSent < N):
            dataSend[:] = randomizedPointIndices[nSent]
            world.Send(dataSend, dest=workerRank, tag=run)
            nSent += 1
        else:
            dataSend[0] = -1
            world.Send(dataSend, dest=workerRank, tag=killSwitch)

        elapsed = MPI.Wtime() - t0
        eta = (N / nFinished - 1) * elapsed
        myMPI.print(
            'Inverted data point {} in {:.3f}s  ||  Time: {:.3f}s  ||  QueueLength: {}/{}  ||  ETA: {:.3f}s'
            .format(dataPointProcessed, rankRecv[2], elapsed, N - nFinished, N,
                    eta))
Esempio n. 3
0
def masterTask(Dataset, world):
    """ Define a Send Recv Send procedure on the master """

    from mpi4py import MPI
    from geobipy.src.base import MPI as myMPI

    # Set the total number of data points
    nPoints = Dataset.nPoints

    nFinished = 0
    nSent = 0
    continueRunning = np.empty(1, dtype=np.int32)
    rankRecv = np.zeros(3, dtype=np.float64)

    # Send out the first indices to the workers
    for iWorker in range(1, world.size):
        # Get a datapoint from the file.
        DataPoint = Dataset._readSingleDatapoint()

        # If DataPoint is None, then we reached the end of the file and no more points can be read in.
        if DataPoint is None:
            # Send the kill switch to the worker to shut down.
            continueRunning[0] = 0  # Do not continue running
            world.Isend(continueRunning, dest=iWorker)
        else:
            continueRunning[0] = 1  # Yes, continue with the next point.
            world.Isend(continueRunning, dest=iWorker)
            DataPoint.Isend(dest=iWorker, world=world)

        nSent += 1

    # Start a timer
    t0 = MPI.Wtime()

    myMPI.print("Initial data points sent. Master is now waiting for requests")

    # Now wait to send indices out to the workers as they finish until the entire data set is finished
    while nFinished < nPoints:
        # Wait for a worker to request the next data point
        world.Recv(rankRecv,
                   source=MPI.ANY_SOURCE,
                   tag=MPI.ANY_TAG,
                   status=MPI.Status())
        requestingRank = np.int(rankRecv[0])
        dataPointProcessed = rankRecv[1]

        nFinished += 1

        # Read the next data point from the file
        DataPoint = Dataset._readSingleDatapoint()

        # If DataPoint is None, then we reached the end of the file and no more points can be read in.
        if DataPoint is None:
            # Send the kill switch to the worker to shut down.
            continueRunning[0] = 0  # Do not continue running
            world.Isend(continueRunning, dest=requestingRank)
        else:
            continueRunning[0] = 1  # Yes, continue with the next point.
            world.Isend(continueRunning, dest=requestingRank)
            DataPoint.Isend(dest=requestingRank,
                            world=world,
                            systems=DataPoint.system)

        elapsed = MPI.Wtime() - t0
        eta = (nPoints / nFinished - 1) * elapsed
        myMPI.print(
            'Inverted data point {} in {:.3f}s  ||  Time: {:.3f}s  ||  QueueLength: {}/{}  ||  ETA: {:.3f}s'
            .format(dataPointProcessed, rankRecv[2], elapsed,
                    nPoints - nFinished, nPoints, eta))
Esempio n. 4
0
def multipleCore(inputFile, outputDir, skipHDF5):

    from mpi4py import MPI
    from geobipy.src.base import MPI as myMPI

    world = MPI.COMM_WORLD
    myMPI.rankPrint(world, 'Running EMinv1D_MCMC')
    myMPI.rankPrint(world, 'Using user input file {}'.format(inputFile))
    rank = world.rank
    nRanks = world.size
    masterRank = rank == 0

    # Start keeping track of time.
    t0 = MPI.Wtime()
    t1 = t0

    UP = import_module(inputFile, package=None)

    # Make data and system filenames lists of str.
    if isinstance(UP.dataFilename, str):
        UP.dataFilename = [UP.dataFilename]
    if isinstance(UP.systemFilename, str):
        UP.systemFilename = [UP.systemFilename]

    # Everyone needs the system classes read in early.
    Dataset = eval(customFunctions.safeEval(UP.dataInit))
    Dataset.readSystemFile(UP.systemFilename)

    # Get the number of points in the file.
    if masterRank:
        nPoints = Dataset._readNpoints(UP.dataFilename)
        assert (nRanks - 1 <= nPoints + 1), Exception(
            'Do not ask for more cores than you have data points! Cores:nData {}:{} '
            .format(nRanks, nPoints))

    # Create a communicator containing only the master rank.
    allGroup = world.Get_group()
    masterGroup = allGroup.Incl([0])
    masterComm = world.Create(masterGroup)

    # Create a parallel RNG on each worker with a different seed.
    prng = myMPI.getParallelPrng(world, MPI.Wtime)

    myMPI.rankPrint(world,
                    'Creating HDF5 files, this may take a few minutes...')
    myMPI.rankPrint(
        world,
        'Files are being created for data files {} and system files {}'.format(
            UP.dataFilename, UP.systemFilename))
    ### Only do this using the Master subcommunicator!
    # Here we initialize the HDF5 files.
    if (masterComm != MPI.COMM_NULL):

        # Make sure the results folders exist
        try:
            makedirs(outputDir)
        except:
            pass

        # Prepare the dataset so that we can read a point at a time.
        Dataset._initLineByLineRead(UP.dataFilename, UP.systemFilename)
        # Get a datapoint from the file.
        DataPoint = Dataset._readSingleDatapoint()

        Dataset._closeDatafiles()

        # While preparing the file, we need access to the line numbers and fiducials in the data file
        tmp = fileIO.read_columns(UP.dataFilename[0],
                                  Dataset._indicesForFile[0][:2], 1, nPoints)

        Dataset._openDatafiles(UP.dataFilename)

        # Get the line numbers in the data
        lineNumbers = np.unique(tmp[:, 0])
        lineNumbers.sort()
        nLines = lineNumbers.size
        fiducials = tmp[:, 1]

        # Read in the user parameters
        paras = UP.userParameters(DataPoint)

        # Check the parameters
        paras.check(DataPoint)

        # Initialize the inversion to obtain the sizes of everything
        [paras, Mod, D, prior, posterior, PhiD] = Initialize(paras,
                                                             DataPoint,
                                                             prng=prng)

        # Create the results template
        Res = Results(D,
                      Mod,
                      save=paras.save,
                      plot=paras.plot,
                      savePNG=paras.savePNG,
                      nMarkovChains=paras.nMarkovChains,
                      plotEvery=paras.plotEvery,
                      parameterDisplayLimits=paras.parameterDisplayLimits,
                      reciprocateParameters=paras.reciprocateParameters)

        # For each line. Get the fiducials, and create a HDF5 for the Line results.
        # A line results file needs an initialized Results class for a single data point.
        for line in lineNumbers:
            fiducialsForLine = np.where(tmp[:, 0] == line)[0]
            nFids = fiducialsForLine.size
            # Create a filename for the current line number
            fName = join(outputDir, '{}.h5'.format(line))
            # Open a HDF5 file in parallel mode.
            with h5py.File(fName, 'w', driver='mpio', comm=masterComm) as f:
                LR = LineResults()
                LR.createHdf(f, tmp[fiducialsForLine, 1], Res)
            myMPI.rankPrint(
                world,
                'Time to create the line with {} data points: {:.3f} s'.format(
                    nFids,
                    MPI.Wtime() - t0))
            t0 = MPI.Wtime()

        myMPI.print('Initialized Results for writing.')

    # Everyone needs the line numbers in order to open the results files collectively.
    if masterRank:
        DataPointType = DataPoint.hdfName()
    else:
        lineNumbers = None
        DataPointType = None
    lineNumbers = myMPI.Bcast(lineNumbers, world)
    nLines = lineNumbers.size

    DataPointType = world.bcast(DataPointType)

    # Open the files collectively
    LR = [None] * nLines
    for i, line in enumerate(lineNumbers):
        fName = join(outputDir, '{}.h5'.format(line))
        LR[i] = LineResults(fName,
                            hdfFile=h5py.File(fName,
                                              'a',
                                              driver='mpio',
                                              comm=world))

    world.barrier()
    myMPI.rankPrint(world,
                    'Files Created in {:.3f} s'.format(MPI.Wtime() - t1))
    t0 = MPI.Wtime()

    # Carryout the master-worker tasks
    if (world.rank == 0):
        masterTask(Dataset, world)
    else:
        DataPoint = eval(customFunctions.safeEval(DataPointType))
        workerTask(DataPoint, UP, prng, world, lineNumbers, LR)

    world.barrier()
    # Close all the files. Must be collective.
    for i in range(nLines):
        LR[i].close()

    if masterRank:
        Dataset._closeDatafiles()
Esempio n. 5
0
def multipleCore(inputFile, outputDir, skipHDF5):

    from mpi4py import MPI
    from geobipy.src.base import MPI as myMPI

    world = MPI.COMM_WORLD
    myMPI.rankPrint(world, 'Running EMinv1D_MCMC')

    UP = import_module(inputFile, package=None)

    AllData = eval(UP.dataInit)
    # Initialize the data object on master
    if (world.rank == 0):
        AllData.read(UP.dataFname, UP.sysFname)

    myData = AllData.Bcast(world)
    if (world.rank == 0): myData = AllData

    myMPI.rankPrint(world, 'Data Broadcast')

    assert (
        world.size <= myData.N + 1
    ), 'Do not ask for more cores than you have data points! Cores:nData ' + str(
        [world.size, myData.N])

    allGroup = world.Get_group()
    masterGroup = allGroup.Incl([0])
    masterComm = world.Create(masterGroup)

    t0 = MPI.Wtime()
    t1 = t0

    prng = myMPI.getParallelPrng(world, MPI.Wtime)

    # Make sure the line results folders exist
    try:
        makedirs(outputDir)
    except:
        pass

    # Get a datapoint, it doesnt matter which one
    DataPoint = myData.getDataPoint(0)
    # Read in the user parameters
    paras = UP.userParameters(DataPoint)
    # Check the parameters
    paras.check(DataPoint)
    # Initialize the inversion to obtain the sizes of everything
    [paras, Mod, D, prior, posterior, PhiD] = Initialize(paras,
                                                         DataPoint,
                                                         prng=prng)
    # Create the results template
    Res = Results(paras.save, paras.plot, paras.savePNG, paras, D, Mod)

    world.barrier()
    myMPI.rankPrint(world, 'Initialized Results')

    # Get the line numbers in the data
    lines = np.unique(myData.line)
    lines.sort()
    nLines = lines.size

    world.barrier()

    myMPI.rankPrint(world,
                    'Creating HDF5 files, this may take a few minutes...')
    ### Only do this using the subcommunicator!
    if (masterComm != MPI.COMM_NULL):
        for i in range(nLines):
            j = np.where(myData.line == lines[i])[0]
            fName = join(outputDir, str(lines[i]) + '.h5')
            with h5py.File(fName, 'w', driver='mpio', comm=masterComm) as f:
                LR = LineResults()
                LR.createHdf(f, myData.id[j], Res)
            myMPI.rankPrint(
                world,
                'Time to create the line with {} data points: {:.3f} s'.format(
                    j.size,
                    MPI.Wtime() - t0))
            t0 = MPI.Wtime()

    world.barrier()

    # Open the files collectively
    LR = [None] * nLines
    for i in range(nLines):
        fName = join(outputDir, str(lines[i]) + '.h5')
        LR[i] = LineResults(fName,
                            hdfFile=h5py.File(fName,
                                              'a',
                                              driver='mpio',
                                              comm=world))
        # myMPI.print("rank {} line {} iDs {}".format(world.rank, i, LR[i].iDs))

    world.barrier()
    myMPI.rankPrint(world,
                    'Files Created in {:.3f} s'.format(MPI.Wtime() - t1))
    t0 = MPI.Wtime()

    # Carryout the master-worker tasks
    if (world.rank == 0):
        masterTask(myData, world)
    else:
        workerTask(myData, UP, prng, world, LR)

    world.barrier()
    # Close all the files
    for i in range(nLines):
        LR[i].close()
Esempio n. 6
0
def masterTask(Dataset, world):
  """ Define a Send Recv Send procedure on the master """

  from mpi4py import MPI
  from geobipy.src.base import MPI as myMPI

  # Set the total number of data points
  nPoints = Dataset.nPoints

  nFinished = 0
  nSent = 0
#   continueRunning = np.empty(1, dtype=np.int32)
#   rankRecv = np.zeros(3, dtype = np.float64)

  # Send out the first indices to the workers
  for iWorker in range(1, world.size):
    # Get a datapoint from the file.
    DataPoint = Dataset._readSingleDatapoint()

    # If DataPoint is None, then we reached the end of the file and no more points can be read in.
    if DataPoint is None:
        # Send the kill switch to the worker to shut down.
        # continueRunning[0] = 0 # Do not continue running
        continueRunning =False
        world.send(continueRunning, dest=iWorker)
    else:
        # continueRunning[0] = 1 # Yes, continue with the next point.
        continueRunning = True
        world.send(continueRunning, dest=iWorker)
        DataPoint.Isend(dest=iWorker, world=world)

    nSent += 1

  # Start a timer
  t0 = MPI.Wtime()

  myMPI.print("Initial data points sent. Master is now waiting for requests")

  # Now wait to send indices out to the workers as they finish until the entire data set is finished
  while nFinished < nPoints:
    # Wait for a worker to request the next data point
    status = MPI.Status()
    dummy = world.recv(source = MPI.ANY_SOURCE, tag = MPI.ANY_TAG, status = status)
    requestingRank = status.Get_source()
    # requestingRank = np.int(rankRecv[0])
    # dataPointProcessed = rankRecv[1]

    nFinished += 1

    # Read the next data point from the file
    DataPoint = Dataset._readSingleDatapoint()

    # If DataPoint is None, then we reached the end of the file and no more points can be read in.
    if DataPoint is None:
        # Send the kill switch to the worker to shut down.
        # continueRunning[0] = 0 # Do not continue running
        continueRunning = False
        world.send(continueRunning, dest=requestingRank)
    else:
        # continueRunning[0] = 1 # Yes, continue with the next point.
        continueRunning = True
        world.send(continueRunning, dest=requestingRank)
        DataPoint.Isend(dest=requestingRank, world=world, systems=DataPoint.system)

    report = (nFinished % (world.size - 1)) == 0 or nFinished == nPoints

    if report:
        e = MPI.Wtime() - t0
        elapsed = str(timedelta(seconds=e))
        eta = str(timedelta(seconds=(nPoints / nFinished-1) * e))
        myMPI.print("Remaining Points {}/{} || Elapsed Time: {} h:m:s || ETA {} h:m:s".format(nPoints-nFinished, nPoints, elapsed, eta))
Esempio n. 7
0
def masterTask(myData, world):
    """ Define a Send Recv Send procedure on the master """

    from mpi4py import MPI
    from geobipy.src.base import MPI as myMPI

    mpi_status = MPI.Status()
    # Set the total number of data points

    N = myData.N

    # Create and shuffle and integer list for the number of data points
    iTmp = np.arange(N)
    np.random.shuffle(iTmp)
    iList = []
    for i in range(N):
        iList.append([iTmp[i], 0])

    nFinished = 0
    nSent = 0
    dataSend = np.zeros(2, dtype=np.int64)
    rankRecv = np.zeros(3, dtype=np.int64)

    # Send out the first indices to the workers
    for iWorker in range(1, world.size):
        popped = iList.pop(randint(len(iList)))
        dataSend[:] = popped
        world.Send(dataSend, dest=iWorker, tag=run)
        nSent += 1

    # Start a timer
    t0 = MPI.Wtime()

    # Now wait to send indices out to the workers as they finish until the entire data set is finished
    while nFinished < N:
        # Wait for a worker to ping you
        world.Recv(rankRecv,
                   source=MPI.ANY_SOURCE,
                   tag=MPI.ANY_TAG,
                   status=mpi_status)

        # Check if the data point failed or not
        failed = False
        if mpi_status.Get_tag() == dpFailed:
            failed = True
            # If it failed, append the data point to the list
            if (rankRecv[2] < 3):  # If the number of runs is within the limit
                item = [rankRecv[1], rankRecv[2] + 1]
                iList.append(item)
            else:
                nFinished += 1
        elif (mpi_status.Get_tag() == dpWin):
            # If it won, increase the number of processed data
            nFinished += 1

        # Send out the next point if the list is not empty
        if (len(iList) > 0):
            popped = iList.pop(randint(len(iList)))
            dataSend[:] = popped
            world.Send(dataSend, dest=rankRecv[0], tag=run)
            nSent += 1
        else:
            dataSend[0] = -1
            world.Send(dataSend, dest=rankRecv[0], tag=killSwitch)

        if (not failed):
            elapsed = MPI.Wtime() - t0
            eta = (N / nFinished - 1) * elapsed
            myMPI.print(
                'Time: {:.3f} Sent: {} Finished: {} QueueLength: {}/{} ETA: {:.3f}'
                .format(elapsed, nSent, nFinished, len(iList), N, eta))
Esempio n. 8
0
world = MPI.COMM_WORLD

rank = world.rank
master = rank == 0
size = world.size

#assert size == 4, Exception("Please use 4 cores to test")

dataPath = "..//documentation_source//source//examples//supplementary//Data//" 

x = 1
# Set up array sizes for consistency and chunk lengths per core
N = x * size+1

starts, chunks = myMPI.loadBalance1D_shrinkingArrays(N, size)
i0 = starts[rank]
i1 = i0 + chunks[rank]

### Test base geobipy.MPI routines

# data type
dt = None
if master:
    x = np.full(rank+1, rank)
    myMPI._isendDtype(x, dest=1, world=world)
elif rank == 1:
    dt = myMPI._irecvDtype(source=0, world=world)


if not master:
Esempio n. 9
0
from geobipy import DataPoint
from geobipy import FdemData
from geobipy import TdemData
from geobipy import FdemSystem
from geobipy import TdemSystem
from geobipy import FdemDataPoint
from geobipy import TdemDataPoint
from geobipy.src.base import MPI as myMPI

world = MPI.COMM_WORLD

rank = world.rank
master = rank == 0
size = world.size

myMPI.helloWorld(world)

x = 1
# Set up array sizes for consistency and chunk lengths per core
N = x * size+1

starts, chunks = myMPI.loadBalance_shrinkingArrays(N, size)

myMPI.rankPrint(world, "start indices: {}".format(starts))
myMPI.rankPrint(world, "chunk sizes: {}".format(chunks))

### Test base geobipy.MPI routines

# data type
# dt = None
# if master: