예제 #1
0
def simple_device(verbose=False):
    display_func = _util.display_func(verbose)
    print_func = _util.print_func(verbose)
    print_func(af.device_info())
    print_func(af.get_device_count())
    print_func(af.is_dbl_supported())
    af.sync()

    dev = af.get_device()
    print_func(dev)
    for k in range(af.get_device_count()):
        af.set_device(k)
        dev = af.get_device()
        assert (k == dev)

        print_func(af.is_dbl_supported(k))

        af.device_gc()

        mem_info_old = af.device_mem_info()

        a = af.randu(100, 100)
        af.sync(dev)
        mem_info = af.device_mem_info()
        assert (mem_info['alloc']['buffers'] == 1 +
                mem_info_old['alloc']['buffers'])
        assert (mem_info['lock']['buffers'] == 1 +
                mem_info_old['lock']['buffers'])

    af.set_device(dev)
예제 #2
0
def simple_device(verbose=False):
    display_func = _util.display_func(verbose)
    print_func   = _util.print_func(verbose)
    print_func(af.device_info())
    print_func(af.get_device_count())
    print_func(af.is_dbl_supported())
    af.sync()

    dev = af.get_device()
    print_func(dev)
    for k in range(af.get_device_count()):
        af.set_device(k)
        dev = af.get_device()
        assert(k == dev)

        print_func(af.is_dbl_supported(k))

        af.device_gc()

        mem_info_old = af.device_mem_info()

        a = af.randu(100, 100)
        af.sync(dev)
        mem_info = af.device_mem_info()
        assert(mem_info['alloc']['buffers'] == 1 + mem_info_old['alloc']['buffers'])
        assert(mem_info[ 'lock']['buffers'] == 1 + mem_info_old[ 'lock']['buffers'])

    af.set_device(dev)
예제 #3
0
def simple_device(verbose=False):
    display_func = _util.display_func(verbose)
    print_func   = _util.print_func(verbose)
    print_func(af.device_info())
    print_func(af.get_device_count())
    print_func(af.is_dbl_supported())
    af.sync()

    curr_dev = af.get_device()
    print_func(curr_dev)
    for k in range(af.get_device_count()):
        af.set_device(k)
        dev = af.get_device()
        assert(k == dev)

        print_func(af.is_dbl_supported(k))

        af.device_gc()

        mem_info_old = af.device_mem_info()

        a = af.randu(100, 100)
        af.sync(dev)
        mem_info = af.device_mem_info()
        assert(mem_info['alloc']['buffers'] == 1 + mem_info_old['alloc']['buffers'])
        assert(mem_info[ 'lock']['buffers'] == 1 + mem_info_old[ 'lock']['buffers'])

    af.set_device(curr_dev)

    a = af.randu(10,10)
    display_func(a)
    dev_ptr = af.get_device_ptr(a)
    print_func(dev_ptr)
    b = af.Array(src=dev_ptr, dims=a.dims(), dtype=a.dtype(), is_device=True)
    display_func(b)

    c = af.randu(10,10)
    af.lock_array(c)
    af.unlock_array(c)

    a = af.constant(1, 3, 3)
    b = af.constant(2, 3, 3)
    af.eval(a)
    af.eval(b)
    print_func(a)
    print_func(b)
    c = a + b
    d = a - b
    af.eval(c, d)
    print_func(c)
    print_func(d)

    print_func(af.set_manual_eval_flag(True))
    assert(af.get_manual_eval_flag() == True)
    print_func(af.set_manual_eval_flag(False))
    assert(af.get_manual_eval_flag() == False)

    display_func(af.is_locked_array(a))
예제 #4
0
def gpuGridrec(tomo,angles,center,input_params):
        """
        Gridrec reconstruction using GPU based gridding
        Inputs: tomo : 3D numpy sinogram array with dimensions same as tomopy
        angles : Array of angles in radians
        center : Floating point center of rotation
        input_params : A dictionary with the keys
        'gpu_device' : Device id of the gpu (For a 4 GPU cluster ; 0-3)
        'oversamp_factor': A factor by which to pad the image/data for FFT
        'fbp_filter_param' : A number between 0-1 for setting the filter cut-off for FBP
        """

        print('Starting GPU NUFFT recon')
        #allocate space for final answer 
        af.set_device(input_params['gpu_device']) #Set the device number for gpu based code
        #Change tomopy format
        new_tomo=np.transpose(tomo,(1,2,0)) #slice, columns, angles
        im_size =  new_tomo.shape[1]
        num_slice = new_tomo.shape[0]
        num_angles=new_tomo.shape[2]
        pad_size=np.int16(im_size*input_params['oversamp_factor'])
#        nufft_scaling = (np.pi/pad_size)**2
        #Initialize structures for NUFFT
        sino={}
        geom={}
        sino['Ns'] =  pad_size#Sinogram size after padding
        sino['Ns_orig'] = im_size #size of original sinogram
        sino['center'] = center + (sino['Ns']/2 - sino['Ns_orig']/2)  #for padded sinogram
        sino['angles'] = angles
        sino['filter'] = input_params['fbp_filter_param'] #Paramter to control strength of FBP filter normalized to [0,1]

        #Initialize NUFFT parameters
        nufft_params = init_nufft_params(sino,geom)
        rec_nufft = afnp.zeros((num_slice/2,sino['Ns_orig'],sino['Ns_orig']),dtype=afnp.complex64)
        Ax = afnp.zeros((sino['Ns'],num_angles),dtype=afnp.complex64)
        pad_idx = slice(sino['Ns']/2-sino['Ns_orig']/2,sino['Ns']/2+sino['Ns_orig']/2)
        rec_nufft_final=np.zeros((num_slice,sino['Ns_orig'],sino['Ns_orig']),dtype=np.float32)
        
        #Move all data to GPU
        slice_1=slice(0,num_slice,2)
        slice_2=slice(1,num_slice,2)
        gdata=afnp.array(new_tomo[slice_1]+1j*new_tomo[slice_2],dtype=afnp.complex64)
        x_recon = afnp.zeros((sino['Ns'],sino['Ns']),dtype=afnp.complex64)
        #loop over all slices
        for i in range(0,num_slice/2):
          Ax[pad_idx,:]=gdata[i]
          #filtered back-projection 
          rec_nufft[i] = (back_project(Ax,nufft_params))[pad_idx,pad_idx]


        #Move to CPU
        #Rescale result to match tomopy
        rec_nufft=np.array(rec_nufft,dtype=np.complex64) #*nufft_scaling
        rec_nufft_final[slice_1]=np.array(rec_nufft.real,dtype=np.float32)
        rec_nufft_final[slice_2]=np.array(rec_nufft.imag,dtype=np.float32)
        return rec_nufft_final
예제 #5
0
def main():
    parser = argparse.ArgumentParser(
        description='af vs sklearn logit comparison')
    parser.add_argument('-b',
                        '--backend',
                        choices=['default', 'cpu', 'cuda', 'opencl'],
                        default='default',
                        action='store',
                        help='ArrayFire backend to be used')
    parser.add_argument('-v',
                        '--device',
                        type=int,
                        default=0,
                        action='store',
                        help='ArrayFire backend device to be used')
    parser.add_argument('-d',
                        '--dataset',
                        choices=['iris', 'mnist', 'notmnist'],
                        default='iris',
                        action='store',
                        help='Dataset to be used')
    parser.add_argument('-t',
                        '--type',
                        choices=['simple', 'predict', 'benchmark'],
                        default='simple',
                        action='store',
                        help='Demo type')
    args = parser.parse_args()

    af.set_backend(args.backend)
    af.set_device(args.device)

    af.info()

    dataset = None
    if args.dataset == 'iris':
        dataset = read_and_preprocess_iris_data()
    elif args.dataset == 'mnist':
        dataset = read_and_preprocess_mnist_data()
    elif args.dataset == 'notmnist':
        dataset = read_and_preprocess_notmnist_data()
    else:
        parser.print_help()
        return -1

    print('------------')

    if args.type == 'simple':
        demo_simple(arrayfire_knn_demo, sklearn_knn_demo, dataset)
    elif args.type == 'predict':
        demo_pred(arrayfire_knn_demo, sklearn_knn_demo, dataset)
    elif args.type == 'benchmark':
        demo_bench(arrayfire_knn_demo, sklearn_knn_demo, dataset)
    else:
        parser.print_help()
        return -1
예제 #6
0
파일: device.py 프로젝트: eeeedgar/model_1
    def _get_compute_device_internal(id: int) -> ComputeDevice:
        af.set_device(id)
        device_info = af.device_info()
        name = device_info['device']
        backend = device_info['backend']
        toolkit_version = device_info['toolkit']
        compute_version = device_info['compute']

        return ComputeDevice(id, name, backend, toolkit_version,
                             compute_version)
예제 #7
0
파일: device.py 프로젝트: eeeedgar/model_1
    def set_compute_device(compute_device: tp.Union[int, ComputeDevice]) \
            -> None:
        if isinstance(compute_device, int):
            compute_device \
                = ComputeDeviceManager.get_compute_device(compute_device)
        elif not isinstance(compute_device, ComputeDevice):
            raise TypeError(f"The argument compute_device must be of "
                            f"type ComputeDevice or of type int. The argument "
                            f"provided is of type {type(compute_device)}")

        af.set_device(compute_device.id)
예제 #8
0
파일: device.py 프로젝트: eeeedgar/model_1
    def get_compute_devices(cls) -> tp.Sequence[ComputeDevice]:
        if ComputeDeviceManager._compute_devices is None:
            saved_device_id = cls.get_current_compute_device_id()
            n = af.get_device_count()
            ComputeDeviceManager._compute_devices = []

            for id in range(n):
                (ComputeDeviceManager._compute_devices.append(
                    ComputeDeviceManager._get_compute_device_internal(id)))

            af.set_device(saved_device_id)

        return ComputeDeviceManager._compute_devices
def main():
    argc = len(sys.argv)

    device = int(sys.argv[1]) if argc > 1 else 0
    console = sys.argv[2][0] == '-' if argc > 2 else False
    perc = int(sys.argv[3]) if argc > 3 else 60

    try:
        af.set_device(device)
        af.info()
        logit_demo(console, perc)
    except Exception as e:
        print('Error: ', str(e))
예제 #10
0
def gpuGridrec(tomo, angles, center, input_params):
    print('Starting GPU NUFFT recon')
    #allocate space for final answer
    af.set_device(
        input_params['gpu_device'])  #Set the device number for gpu based code
    #Change tomopy format
    new_tomo = np.transpose(tomo, (1, 2, 0))  #slice, columns, angles
    im_size = new_tomo.shape[1]
    num_slice = new_tomo.shape[0]
    num_angles = new_tomo.shape[2]
    pad_size = np.int16(im_size * input_params['oversamp_factor'])
    nufft_scaling = (np.pi / pad_size)**2
    #Initialize structures for NUFFT
    sino = {}
    geom = {}
    sino['Ns'] = pad_size  #Sinogram size after padding
    sino['Ns_orig'] = im_size  #size of original sinogram
    sino['center'] = center + (sino['Ns'] / 2 - sino['Ns_orig'] / 2
                               )  #for padded sinogram
    sino['angles'] = angles
    sino['filter'] = input_params[
        'fbp_filter_param']  #Paramter to control strength of FBP filter normalized to [0,1]

    #Initialize NUFFT parameters
    nufft_params = init_nufft_params(sino, geom)
    rec_nufft = afnp.zeros((num_slice / 2, sino['Ns_orig'], sino['Ns_orig']),
                           dtype=afnp.complex64)
    Ax = afnp.zeros((sino['Ns'], num_angles), dtype=afnp.complex64)
    pad_idx = slice(sino['Ns'] / 2 - sino['Ns_orig'] / 2,
                    sino['Ns'] / 2 + sino['Ns_orig'] / 2)
    rec_nufft_final = np.zeros((num_slice, sino['Ns_orig'], sino['Ns_orig']),
                               dtype=np.float32)

    #Move all data to GPU
    slice_1 = slice(0, num_slice, 2)
    slice_2 = slice(1, num_slice, 2)
    gdata = afnp.array(new_tomo[slice_1] + 1j * new_tomo[slice_2],
                       dtype=afnp.complex64)
    x_recon = afnp.zeros((sino['Ns'], sino['Ns']), dtype=afnp.complex64)
    #loop over all slices
    for i in range(0, num_slice / 2):
        Ax[pad_idx, :] = gdata[i]
        #filtered back-projection
        rec_nufft[i] = (back_project(Ax, nufft_params))[pad_idx, pad_idx]

    #Move to CPU
    #Rescale result to match tomopy
    rec_nufft = np.array(rec_nufft, dtype=np.complex64) * nufft_scaling
    rec_nufft_final[slice_1] = np.array(rec_nufft.real, dtype=np.float32)
    rec_nufft_final[slice_2] = np.array(rec_nufft.imag, dtype=np.float32)
    return rec_nufft_final
예제 #11
0
def simple_device(verbose=False):
    display_func = _util.display_func(verbose)
    print_func = _util.print_func(verbose)
    print_func(af.device_info())
    print_func(af.get_device_count())
    print_func(af.is_dbl_supported())
    af.sync()

    curr_dev = af.get_device()
    print_func(curr_dev)
    for k in range(af.get_device_count()):
        af.set_device(k)
        dev = af.get_device()
        assert (k == dev)

        print_func(af.is_dbl_supported(k))

        af.device_gc()

        mem_info_old = af.device_mem_info()

        a = af.randu(100, 100)
        af.sync(dev)
        mem_info = af.device_mem_info()
        assert (mem_info['alloc']['buffers'] == 1 +
                mem_info_old['alloc']['buffers'])
        assert (mem_info['lock']['buffers'] == 1 +
                mem_info_old['lock']['buffers'])

    af.set_device(curr_dev)

    a = af.randu(10, 10)
    display_func(a)
    dev_ptr = af.get_device_ptr(a)
    print_func(dev_ptr)
    b = af.Array(src=dev_ptr, dims=a.dims(), dtype=a.dtype(), is_device=True)
    display_func(b)
    af.lock_device_ptr(b)
    af.unlock_device_ptr(b)
예제 #12
0
def simple_device(verbose=False):
    display_func = _util.display_func(verbose)
    print_func   = _util.print_func(verbose)
    print_func(af.device_info())
    print_func(af.get_device_count())
    print_func(af.is_dbl_supported())
    af.sync()

    curr_dev = af.get_device()
    print_func(curr_dev)
    for k in range(af.get_device_count()):
        af.set_device(k)
        dev = af.get_device()
        assert(k == dev)

        print_func(af.is_dbl_supported(k))

        af.device_gc()

        mem_info_old = af.device_mem_info()

        a = af.randu(100, 100)
        af.sync(dev)
        mem_info = af.device_mem_info()
        assert(mem_info['alloc']['buffers'] == 1 + mem_info_old['alloc']['buffers'])
        assert(mem_info[ 'lock']['buffers'] == 1 + mem_info_old[ 'lock']['buffers'])

    af.set_device(curr_dev)

    a = af.randu(10,10)
    display_func(a)
    dev_ptr = af.get_device_ptr(a)
    print_func(dev_ptr)
    b = af.Array(src=dev_ptr, dims=a.dims(), dtype=a.dtype(), is_device=True)
    display_func(b)
    af.lock_device_ptr(b)
    af.unlock_device_ptr(b)
예제 #13
0
    def __init__(self, physical_system, performance_test_flag = False):
        """
        Constructor for the nonlinear_solver object. It takes the physical
        system object as an argument and uses it in intialization and
        evolution of the system in consideration. 

        Additionally, a performance test flag is also passed which when true,
        stores time which is consumed by each of the major solver routines.
        This proves particularly useful in analyzing performance bottlenecks 
        and obtaining benchmarks.
        
        Parameters:
        -----------

        physical_system: object
                         The defined physical system object which holds
                         all the simulation information such as the initial
                         conditions, and the domain info is passed as an
                         argument in defining an instance of the
                         nonlinear_solver. This system is then evolved, and
                         monitored using the various methods under the
                         nonlinear_solver class.

        performance_test_flag: bool
                               When set to true, the time elapsed in each of the 
                               solver routines is measured. These performance 
                               stats can be obtained at the end of the run using
                               the command print_performance_timings, which summarizes
                               the results in a table.
        """
        self.physical_system = physical_system

        # Holding Domain Info:
        self.q1_start, self.q1_end = physical_system.q1_start,\
                                     physical_system.q1_end
        self.q2_start, self.q2_end = physical_system.q2_start,\
                                     physical_system.q2_end
        self.p1_start, self.p1_end = physical_system.p1_start,\
                                     physical_system.p1_end
        self.p2_start, self.p2_end = physical_system.p2_start,\
                                     physical_system.p2_end
        self.p3_start, self.p3_end = physical_system.p3_start,\
                                     physical_system.p3_end

        # Holding Domain Resolution:
        self.N_q1, self.dq1 = physical_system.N_q1, physical_system.dq1
        self.N_q2, self.dq2 = physical_system.N_q2, physical_system.dq2
        self.N_p1, self.dp1 = physical_system.N_p1, physical_system.dp1
        self.N_p2, self.dp2 = physical_system.N_p2, physical_system.dp2
        self.N_p3, self.dp3 = physical_system.N_p3, physical_system.dp3

        # Getting number of ghost zones, and the boundary 
        # conditions that are utilized:
        N_g = self.N_ghost       = physical_system.N_ghost
        self.boundary_conditions = physical_system.boundary_conditions

        # MPI Communicator:
        self._comm = self.physical_system.mpi_communicator        
        
        if(self.physical_system.params.num_devices>1):
            rank = self._comm.rank
            if (self.physical_system.params.manual_device_allocation == True):
                af.set_device(self.physical_system.params.device_allocation[rank])
            else:
                af.set_device(rank%self.physical_system.params.num_devices)
        

        # Getting number of species:
        N_s = self.N_species = self.physical_system.N_species

        # TODO: Remove mass and charge from lib
        if(type(physical_system.params.mass) == list):
            # Having a temporary copy of the lists to copy to af.Array:
            list_mass   = physical_system.params.mass.copy()
            list_charge = physical_system.params.charge.copy()

            # Initializing af.Arrays for mass and charge:
            # Having the mass and charge along axis 1:
            self.physical_system.params.mass   = af.constant(0, 1, N_s, dtype = af.Dtype.f64)
            self.physical_system.params.charge = af.constant(0, 1, N_s, dtype = af.Dtype.f64)

            for i in range(N_s):
                self.physical_system.params.mass[0, i]   = list_mass[i]
                self.physical_system.params.charge[0, i] = list_charge[i]

        self.physical_system.params.rank = self._comm.rank

        PETSc.Sys.Print('\nBackend Details for Nonlinear Solver:')
        # Printing the backend details for each rank/device/node:
        PETSc.Sys.syncPrint(indent('Rank ' + str(self._comm.rank) + ' of ' + str(self._comm.size-1)))
        PETSc.Sys.syncPrint(indent('On Node: '+ socket.gethostname()))
        PETSc.Sys.syncPrint(indent('Device Details:'))
        PETSc.Sys.syncPrint(indent(af.info_str(), 2))
        PETSc.Sys.syncPrint(indent('Device Bandwidth = ' + str(bandwidth_test(100)) + ' GB / sec'))
        PETSc.Sys.syncPrint()
        PETSc.Sys.syncFlush()

        self.performance_test_flag = performance_test_flag
    
        # Initializing variables which are used to time the components of the solver: 
        if(performance_test_flag == True):
        
            self.time_ts = 0

            self.time_interp2  = 0
            self.time_sourcets = 0

            self.time_fvm_solver  = 0
            self.time_reconstruct = 0
            self.time_riemann     = 0
            
            self.time_fieldstep = 0
            self.time_interp3   = 0
            
            self.time_apply_bcs_f   = 0
            self.time_communicate_f = 0

        petsc_bc_in_q1 = 'ghosted'; self.N_g1 = self.N_ghost
        petsc_bc_in_q2 = 'ghosted'; self.N_g2 = self.N_ghost

        # Only for periodic boundary conditions or shearing-box boundary conditions 
        # do the boundary conditions passed to the DA need to be changed. PETSc
        # automatically handles the application of periodic boundary conditions when
        # running in parallel. For shearing box boundary conditions, an interpolation
        # operation needs to be applied on top of the periodic boundary conditions.
        # In all other cases, ghosted boundaries are used.
        
        if(   self.boundary_conditions.in_q1_left == 'periodic'
           or self.boundary_conditions.in_q1_left == 'shearing-box'
          ):
            petsc_bc_in_q1 = 'periodic'

        if(   self.boundary_conditions.in_q2_bottom == 'periodic'
           or self.boundary_conditions.in_q2_bottom == 'shearing-box'
          ):
            petsc_bc_in_q2 = 'periodic'

        if(self.boundary_conditions.in_q1_left == 'none'):
            petsc_bc_in_q1 = 'none'; self.N_g1 = 0

        if(self.boundary_conditions.in_q2_bottom == 'none'):
            petsc_bc_in_q2 = 'none'; self.N_g2 = 0

        if(self.boundary_conditions.in_q1_left == 'periodic'):
            try:
                assert(self.boundary_conditions.in_q1_right == 'periodic')
            except:
                raise Exception('Periodic boundary conditions need to be applied to \
                                 both the boundaries of a particular axis'
                               )
        
        if(self.boundary_conditions.in_q1_left == 'shearing-box'):
            try:
                assert(self.boundary_conditions.in_q1_right == 'shearing-box')
            except:
                raise Exception('Shearing box boundary conditions need to be applied to \
                                 both the boundaries of a particular axis'
                               )

        if(self.boundary_conditions.in_q2_bottom == 'periodic'):
            try:
                assert(self.boundary_conditions.in_q2_top == 'periodic')
            except:
                raise Exception('Periodic boundary conditions need to be applied to \
                                 both the boundaries of a particular axis'
                               )

        if(self.boundary_conditions.in_q2_bottom == 'shearing-box'):
            try:
                assert(self.boundary_conditions.in_q2_top == 'shearing-box')
            except:
                raise Exception('Shearing box boundary conditions need to be applied to \
                                 both the boundaries of a particular axis'
                               )

        if(self.boundary_conditions.in_q1_left == 'none'):
            try:
                assert(self.boundary_conditions.in_q1_right == 'none')
            except:
                raise Exception('NONE boundary conditions need to be applied to \
                                 both the boundaries of a particular axis'
                               )

        if(self.boundary_conditions.in_q2_bottom == 'none'):
            try:
                assert(self.boundary_conditions.in_q2_top == 'none')
            except:
                raise Exception('NONE boundary conditions need to be applied to \
                                 both the boundaries of a particular axis'
                               )
        self._nproc_in_q1 = PETSc.DECIDE
        self._nproc_in_q2 = PETSc.DECIDE

        # Break up the domain into manually defined portions
        self._ownership_ranges = None
        if self.physical_system.params.enable_manual_domain_decomposition:
            ownership_q1 = [self.N_q1*item for item in self.physical_system.params.q1_partition]
            ownership_q2 = [self.N_q2*item for item in self.physical_system.params.q2_partition]
            self._ownership_ranges = (ownership_q1, ownership_q2)
        # TODO : Implement error handling and give clean messages
        
        # Since shearing boundary conditions require interpolations which are non-local:
        if(self.boundary_conditions.in_q2_bottom == 'shearing-box'):
            self._nproc_in_q1 = 1
        
        if(self.boundary_conditions.in_q1_left == 'shearing-box'):
            self._nproc_in_q2 = 1

        # DMDA is a data structure to handle a distributed structure 
        # grid and its related core algorithms. It stores metadata of
        # how the grid is partitioned when run in parallel which is 
        # utilized by the various methods of the solver.
        self._da_f = PETSc.DMDA().create([self.N_q1, self.N_q2],
                                         dof           = (  self.N_species 
                                                          * self.N_p1 
                                                          * self.N_p2 
                                                          * self.N_p3
                                                         ),
                                         stencil_width = N_g,
                                         boundary_type = (petsc_bc_in_q1,
                                                          petsc_bc_in_q2
                                                         ),
                                         proc_sizes    = (self._nproc_in_q1, 
                                                          self._nproc_in_q2
                                                         ),
                                         ownership_ranges = self._ownership_ranges,
                                         stencil_type  = 1,
                                         comm          = self._comm
                                        )
        lx, ly = self._da_f.getOwnershipRanges()

        # This DA is used by the FileIO routine dump_moments():
        # Finding the number of definitions for the moments:
        attributes = [a for a in dir(self.physical_system.moments) if not a.startswith('_')]

        # Removing utility functions:
        if('integral_over_v' in attributes):
            attributes.remove('integral_over_v')

        self._da_dump_moments = PETSc.DMDA().create([self.N_q1, self.N_q2],
                                                    dof        =   self.N_species
                                                            * (len(attributes)-2), # don't countintegral_over_p, and params in moments.py
                                                    proc_sizes = (self._nproc_in_q1, 
                                                                  self._nproc_in_q2
                                                                 ),
                                                    ownership_ranges = self._ownership_ranges,
                                                    comm       = self._comm
                                                   )

        # For dumping aux arrays:
        self.dump_aux_arrays_initial_call = 1


        # Creation of the local and global vectors from the DA:
        # This is for the distribution function
        self._glob_f  = self._da_f.createGlobalVec()
        self._local_f = self._da_f.createLocalVec()

        # The following vector is used to dump the data to file:
        self._glob_moments = self._da_dump_moments.createGlobalVec()

        # Getting the arrays for the above vectors:
        self._glob_f_array       = self._glob_f.getArray()
        self._local_f_array      = self._local_f.getArray()
        self._glob_moments_array = self._glob_moments.getArray()

        # Setting names for the objects which will then be
        # used as the key identifiers for the HDF5 files:
        PETSc.Object.setName(self._glob_f, 'distribution_function')
        PETSc.Object.setName(self._glob_moments, 'moments')

        # Indexing vars used through out
        self.i_q1_start = self.N_g1; self.i_q1_end = -self.N_g1
        self.i_q2_start = self.N_g2; self.i_q2_end = -self.N_g2
        if (self.N_g1 == 0):
            self.i_q1_end = 1

        if (self.N_g2 == 0):
            self.i_q2_end = 1

        # Get start (corner) indices of the local zone wrt global ordering and its size
        ((_i_q1_start, _i_q2_start), (_N_q1_local, _N_q2_local)) = self._da_f.getCorners()

        # Coordinates of the local zone in a global coord system
        self.q1_start_local = self.q1_start + _i_q1_start * self.dq1
        self.q2_start_local = self.q2_start + _i_q2_start * self.dq2

        # TODO : Fix this. Passing into params for use in coords.py
        self.physical_system.params.q1_start_local_left   = self.q1_start_local
        self.physical_system.params.q2_start_local_bottom = self.q2_start_local
        

        print("nonlinear.py: rank = ", self._comm.rank,
              "(q1_start_local, q2_start_local) =  (", 
              self.q1_start_local, self.q2_start_local, ")"
             )
        print("nonlinear.py: rank = ", self._comm.rank,
              "(N_q1_local, N_q2_local) =  (", 
              _N_q1_local, _N_q2_local, ")"
             )
        print("nonlinear.py: rank = ", self._comm.rank,
              "ownership_ranges : lx = ", lx, "ly = ", ly
             )

        self.N_q1_local = _N_q1_local
        self.N_q2_local = _N_q2_local

        self.N_q1_local_with_Ng = _N_q1_local + 2*self.N_g1
        self.N_q2_local_with_Ng = _N_q2_local + 2*self.N_g2

        # Obtaining the array values of spatial coordinates:
        q_left_bot, q_center_bot, q_left_center, q_center = \
            calculate_q(self.q1_start_local, 
                        self.q2_start_local,
                        self.N_q1_local, self.N_q2_local, 
                        self.N_g1, self.N_g2,
                        self.dq1, self.dq2
                       )

        self.q1_left_bot    = q_left_bot[0]
        self.q2_left_bot    = q_left_bot[1]

        self.q1_center_bot  = q_center_bot[0]
        self.q2_center_bot  = q_center_bot[1]

        self.q1_left_center = q_left_center[0]
        self.q2_left_center = q_left_center[1]

        self.q1_center      = q_center[0]
        self.q2_center      = q_center[1]

        self.p1_center, self.p2_center, self.p3_center = \
            calculate_p_center(self.p1_start, self.p2_start, self.p3_start,
                               self.N_p1, self.N_p2, self.N_p3,
                               self.dp1, self.dp2, self.dp3, 
                              )

        self.p1_left, self.p2_bottom, self.p3_back = \
            calculate_p_corner(self.p1_start, self.p2_start, self.p3_start,
                               self.N_p1, self.N_p2, self.N_p3,
                               self.dp1, self.dp2, self.dp3, 
                              )

        # Need to convert the lists dp1, dp2, dp3 to af.Arrays for vector
        # computations to work
        self.dp1 = af.moddims(af.to_array(np.array(self.dp1)), 1, self.N_species)
        self.dp2 = af.moddims(af.to_array(np.array(self.dp2)), 1, self.N_species)
        self.dp3 = af.moddims(af.to_array(np.array(self.dp3)), 1, self.N_species)

        # Need to do the same for the p1_start/end lists.
        self.p1_start = af.moddims(af.to_array(self.p1_start), 1, self.N_species)
        self.p2_start = af.moddims(af.to_array(self.p2_start), 1, self.N_species)
        self.p3_start = af.moddims(af.to_array(self.p3_start), 1, self.N_species)

        self.p1_end = af.moddims(af.to_array(self.p1_end), 1, self.N_species)
        self.p2_end = af.moddims(af.to_array(self.p2_end), 1, self.N_species)
        self.p3_end = af.moddims(af.to_array(self.p3_end), 1, self.N_species)

        self.p2_left = self.p2_center
        self.p3_left = self.p3_center

        self.p1_bottom = self.p1_center
        self.p3_bottom = self.p3_center

        self.p1_back = self.p1_center
        self.p2_back = self.p2_center
        
        # Initialize according to initial condition provided by user:
        self._initialize(physical_system.params)
    
        # Initializing a variable to track time-elapsed:
        self.time_elapsed = 0

        # Assigning the function objects to methods of the solver:
        self._A_q = physical_system.A_q
        self._C_q = physical_system.C_q
        self._A_p = physical_system.A_p
        self._C_p = physical_system.C_p

        # Source/Sink term:
        self._source = physical_system.source
예제 #14
0
  parser.add_argument("--N", help="Amount of measurements taken. Default: 10",default=10,type=int)
  parser.add_argument("--sigma", help="Sigma of gaussian filter. Default: 1.0",default=1.0,type=float)
  parser.add_argument("--size", help="Size of squared image. Default: 1024",default=1024,type=int)
  parser.add_argument("--raw", help="Produce raw output for plot data.",action="store_true")
  parser.add_argument("--device", help="Select GPU device number. Default: 0",default=0,type=int)

  args = parser.parse_args()

  if not args.raw:
    print "--- Parameters ---"
    print "\tSize:",args.size
    print "\tN:",args.N
    print "\tSigma:",args.sigma
    print "\tDevice:",args.device

  af.set_device(args.device)

  # create input image
  img = np.random.random((args.size,args.size))

  # create arrayfire gaussiankernel
  start = time.clock()
  afsmk = af_gaussian2D(args.sigma)
  end = time.clock()
  af_kernel = end-start

  # storage for times
  af_cpy_hd = np.zeros(args.N)
  af_convolve = np.zeros(args.N)
  af_cpy_dh = np.zeros(args.N)
  vigra_t = np.zeros(args.N)
예제 #15
0
import arrayfire as af
import afnumpy as afnp
import numpy as np
import sys
from gnufft import tvd_update, add_hessian
import tomopy
import pyqtgraph as pg
import time
import arrayfire as af

af.set_device(2)
nslice = 150
im_size = 2560
#obj = np.ones((nslice,im_size,im_size),dtype=np.float32)
#obj=tomopy.shepp3d((nslice,im_size,im_size),dtype=np.float32)
obj = np.random.rand(nslice, im_size, im_size).astype(np.float32)
x = obj[::2]
y = obj[1::2]
print(x.shape)
vol = x + 1j * y
t = time.time()
vol = afnp.array(vol.astype(np.complex64))  #255*
fcn = afnp.zeros((nslice / 2, im_size, im_size), dtype=np.complex64)
tvd_update(1.2, 1, vol, fcn)
elapsed = time.time() - t
print('Time taken for gradient %f' % (elapsed))
output = np.zeros((nslice, im_size, im_size), dtype=np.float32)
output[::2] = np.array(fcn).real
output[1::2] = np.array(fcn).imag
print(output.max())
print(output.min())
예제 #16
0
    def __init__(self, physical_system, performance_test_flag=False):
        """
        Constructor for the nonlinear_solver object. It takes the physical
        system object as an argument and uses it in intialization and
        evolution of the system in consideration. 

        Additionally, a performance test flag is also passed which when true 
        stores time which is consumed by each of the major solver routines.
        This proves particularly useful in analyzing performance bottlenecks 
        and obtaining benchmarks.
        
        Parameters:
        -----------

        physical_system: The defined physical system object which holds
                         all the simulation information such as the initial
                         conditions, and the domain info is passed as an
                         argument in defining an instance of the
                         nonlinear_solver. This system is then evolved, and
                         monitored using the various methods under the
                         nonlinear_solver class.
        """
        self.physical_system = physical_system

        # Holding Domain Info:
        self.q1_start, self.q1_end = physical_system.q1_start,\
                                     physical_system.q1_end
        self.q2_start, self.q2_end = physical_system.q2_start,\
                                     physical_system.q2_end
        self.p1_start, self.p1_end = physical_system.p1_start,\
                                     physical_system.p1_end
        self.p2_start, self.p2_end = physical_system.p2_start,\
                                     physical_system.p2_end
        self.p3_start, self.p3_end = physical_system.p3_start,\
                                     physical_system.p3_end

        # Holding Domain Resolution:
        self.N_q1, self.dq1 = physical_system.N_q1, physical_system.dq1
        self.N_q2, self.dq2 = physical_system.N_q2, physical_system.dq2
        self.N_p1, self.dp1 = physical_system.N_p1, physical_system.dp1
        self.N_p2, self.dp2 = physical_system.N_p2, physical_system.dp2
        self.N_p3, self.dp3 = physical_system.N_p3, physical_system.dp3

        # Getting number of ghost zones, and the boundary
        # conditions that are utilized:
        N_g = self.N_ghost = physical_system.N_ghost
        self.boundary_conditions = physical_system.boundary_conditions

        # Declaring the communicator:
        self._comm = PETSc.COMM_WORLD.tompi4py()

        if (self.physical_system.params.num_devices > 1):
            af.set_device(self._comm.rank %
                          self.physical_system.params.num_devices)

        PETSc.Sys.Print('\nBackend Details for Nonlinear Solver:')

        # Printing the backend details for each rank/device/node:
        PETSc.Sys.syncPrint(
            indent('Rank ' + str(self._comm.rank) + ' of ' +
                   str(self._comm.size - 1)))
        PETSc.Sys.syncPrint(indent('On Node: ' + socket.gethostname()))
        PETSc.Sys.syncPrint(indent('Device Details:'))
        PETSc.Sys.syncPrint(indent(af.info_str(), 2))
        PETSc.Sys.syncPrint(
            indent('Device Bandwidth = ' + str(bandwidth_test(100)) +
                   ' GB / sec'))
        PETSc.Sys.syncPrint()
        PETSc.Sys.syncFlush()

        self.performance_test_flag = performance_test_flag

        if (performance_test_flag == True):

            self.time_ts = 0

            self.time_interp2 = 0
            self.time_sourcets = 0

            self.time_fvm_solver = 0
            self.time_reconstruct = 0
            self.time_riemann = 0

            self.time_fieldstep = 0
            self.time_fieldsolver = 0
            self.time_interp3 = 0

            self.time_apply_bcs_f = 0
            self.time_apply_bcs_fields = 0

            self.time_communicate_f = 0
            self.time_communicate_fields = 0

        petsc_bc_in_q1 = 'ghosted'
        petsc_bc_in_q2 = 'ghosted'

        # Only for periodic boundary conditions do the boundary
        # conditions passed to the DA need to be changed. PETSc
        # automatically handles the application of periodic
        # boundary conditions when running in parallel. In all other
        # cases, ghosted boundaries are used.

        if (self.boundary_conditions.in_q1_left == 'periodic'):
            petsc_bc_in_q1 = 'periodic'

        if (self.boundary_conditions.in_q2_bottom == 'periodic'):
            petsc_bc_in_q2 = 'periodic'

        # DMDA is a data structure to handle a distributed structure
        # grid and its related core algorithms. It stores metadata of
        # how the grid is partitioned when run in parallel which is
        # utilized by the various methods of the solver.

        self._da_f = PETSc.DMDA().create(
            [self.N_q1, self.N_q2],
            dof=(self.N_p1 * self.N_p2 * self.N_p3),
            stencil_width=self.N_ghost,
            boundary_type=(petsc_bc_in_q1, petsc_bc_in_q2),
            proc_sizes=(PETSc.DECIDE, PETSc.DECIDE),
            stencil_type=1,
            comm=self._comm)

        # This DA object is used in the communication routines for the
        # EM field quantities. A DOF of 6 is taken so that the communications,
        # and application of B.C's may be carried out in a single call among
        # all the field quantities(E1, E2, E3, B1, B2, B3)
        self._da_fields = PETSc.DMDA().create([self.N_q1, self.N_q2],
                                              dof=6,
                                              stencil_width=self.N_ghost,
                                              boundary_type=(petsc_bc_in_q1,
                                                             petsc_bc_in_q2),
                                              proc_sizes=(PETSc.DECIDE,
                                                          PETSc.DECIDE),
                                              stencil_type=1,
                                              comm=self._comm)

        # Additionally, a DMDA object also needs to be created for
        # the KSP/SNES solver with a DOF of 1. This is used to solve for
        # the electrostatic case:

        self._da_ksp = PETSc.DMDA().create([self.N_q1, self.N_q2],
                                           stencil_width=self.N_ghost,
                                           boundary_type=(petsc_bc_in_q1,
                                                          petsc_bc_in_q2),
                                           proc_sizes=(PETSc.DECIDE,
                                                       PETSc.DECIDE),
                                           stencil_type=1,
                                           comm=self._comm)

        # This DA is used by the FileIO routine dump_moments():
        self._da_dump_moments = PETSc.DMDA().create(
            [self.N_q1, self.N_q2],
            dof=len(self.physical_system.moment_exponents),
            proc_sizes=(PETSc.DECIDE, PETSc.DECIDE),
            comm=self._comm)

        # Creation of the local and global vectors from the DA:
        # This is for the distribution function
        self._glob_f = self._da_f.createGlobalVec()
        self._local_f = self._da_f.createLocalVec()

        # The following global and local vectors are used in
        # the communication routines for EM fields
        self._glob_fields = self._da_fields.createGlobalVec()
        self._local_fields = self._da_fields.createLocalVec()

        # The following vector is used to dump the data to file:
        self._glob_moments = self._da_dump_moments.createGlobalVec()

        # Getting the arrays for the above vectors:
        self._glob_f_array = self._glob_f.getArray()
        self._local_f_array = self._local_f.getArray()

        self._glob_fields_array = self._glob_fields.getArray()
        self._local_fields_array = self._local_fields.getArray()

        self._glob_moments_array = self._glob_moments.getArray()

        # Setting names for the objects which will then be
        # used as the key identifiers for the HDF5 files:
        PETSc.Object.setName(self._glob_f, 'distribution_function')
        PETSc.Object.setName(self._glob_moments, 'moments')

        # Obtaining the array values of the cannonical variables:
        self.q1_center, self.q2_center = self._calculate_q_center()
        self.p1, self.p2, self.p3 = self._calculate_p_center()

        # Initialize according to initial condition provided by user:
        self._initialize(physical_system.params)

        # Obtaining start coordinates for the local zone
        # Additionally, we also obtain the size of the local zone
        ((i_q1_start, i_q2_start), (N_q1_local,
                                    N_q2_local)) = self._da_f.getCorners()
        (i_q1_end, i_q2_end) = (i_q1_start + N_q1_local - 1,
                                i_q2_start + N_q2_local - 1)

        # Applying dirichlet boundary conditions:
        if (self.physical_system.boundary_conditions.in_q1_left == 'dirichlet'
            ):
            # If local zone includes the left physical boundary:
            if (i_q1_start == 0):
                self.f[:, :N_g] = self.boundary_conditions.\
                                  f_left(self.f, self.q1_center, self.q2_center,
                                         self.p1, self.p2, self.p3,
                                         self.physical_system.params
                                        )[:, :N_g]

        if (self.physical_system.boundary_conditions.in_q1_right == 'dirichlet'
            ):
            # If local zone includes the right physical boundary:
            if (i_q1_end == self.N_q1 - 1):
                self.f[:, -N_g:] = self.boundary_conditions.\
                                   f_right(self.f, self.q1_center, self.q2_center,
                                           self.p1, self.p2, self.p3,
                                           self.physical_system.params
                                          )[:, -N_g:]

        if (self.physical_system.boundary_conditions.in_q2_bottom ==
                'dirichlet'):
            # If local zone includes the bottom physical boundary:
            if (i_q2_start == 0):
                self.f[:, :, :N_g] = self.boundary_conditions.\
                                     f_bot(self.f, self.q1_center, self.q2_center,
                                           self.p1, self.p2, self.p3,
                                           self.physical_system.params
                                          )[:, :, :N_g]

        if (self.physical_system.boundary_conditions.in_q2_top == 'dirichlet'):
            # If local zone includes the top physical boundary:
            if (i_q2_end == self.N_q2 - 1):
                self.f[:, :, -N_g:] = self.boundary_conditions.\
                                      f_top(self.f, self.q1_center, self.q2_center,
                                            self.p1, self.p2, self.p3,
                                            self.physical_system.params
                                           )[:, :, -N_g:]

        # Assigning the value to the PETSc Vecs(for dump at t = 0):
        (af.flat(self.f)).to_ndarray(self._local_f_array)
        (af.flat(self.f[:, N_g:-N_g, N_g:-N_g])).to_ndarray(self._glob_f_array)

        # Assigning the advection terms along q1 and q2
        self._A_q1 = physical_system.A_q(self.q1_center, self.q2_center,
                                         self.p1, self.p2, self.p3,
                                         physical_system.params)[0]
        self._A_q2 = physical_system.A_q(self.q1_center, self.q2_center,
                                         self.p1, self.p2, self.p3,
                                         physical_system.params)[1]

        # Assigning the conservative advection terms along q1 and q2
        self._C_q1 = physical_system.C_q(self.q1_center, self.q2_center,
                                         self.p1, self.p2, self.p3,
                                         physical_system.params)[0]
        self._C_q2 = physical_system.C_q(self.q1_center, self.q2_center,
                                         self.p1, self.p2, self.p3,
                                         physical_system.params)[1]

        # Assigning the function objects to methods of the solver:
        self._A_p = physical_system.A_p

        # Source/Sink term:
        self._source = physical_system.source

        # Initializing a variable to track time-elapsed:
        # This becomes necessary when applying shearing wall
        # boundary conditions(WIP):
        self.time_elapsed = 0
예제 #17
0
#! /usr/bin/env python3
# -*- coding: utf-8 -*-

import numpy as np

import arrayfire as af

backend = 'opencl'
device = 0

af.set_backend(backend)
af.set_device(device)

from dg_maxwell import lagrange
from dg_maxwell import isoparam
from dg_maxwell import utils
from dg_maxwell import msh_parser
from dg_maxwell import wave_equation
from dg_maxwell import wave_equation_2d

# The domain of the function.
x_nodes = af.np_to_af_array(np.array([-1., 1.]))

# The number of LGL points into which an element is split.
N_LGL = 6

# Number of elements the domain is to be divided into.
N_Elements = 9

# The scheme to be used for integration. Values are either
# 'gauss_quadrature' or 'lobatto_quadrature'
예제 #18
0
    def __init__(self, physical_system, performance_test_flag=False):
        """
        Constructor for the nonlinear_solver object. It takes the physical
        system object as an argument and uses it in intialization and
        evolution of the system in consideration. 

        Additionally, a performance test flag is also passed which when true,
        stores time which is consumed by each of the major solver routines.
        This proves particularly useful in analyzing performance bottlenecks 
        and obtaining benchmarks.
        
        Parameters:
        -----------

        physical_system: The defined physical system object which holds
                         all the simulation information such as the initial
                         conditions, and the domain info is passed as an
                         argument in defining an instance of the
                         nonlinear_solver. This system is then evolved, and
                         monitored using the various methods under the
                         nonlinear_solver class.
        """
        self.physical_system = physical_system

        # Holding Domain Info:
        self.q1_start, self.q1_end = physical_system.q1_start,\
                                     physical_system.q1_end
        self.q2_start, self.q2_end = physical_system.q2_start,\
                                     physical_system.q2_end
        self.p1_start, self.p1_end = physical_system.p1_start,\
                                     physical_system.p1_end
        self.p2_start, self.p2_end = physical_system.p2_start,\
                                     physical_system.p2_end
        self.p3_start, self.p3_end = physical_system.p3_start,\
                                     physical_system.p3_end

        # Holding Domain Resolution:
        self.N_q1, self.dq1 = physical_system.N_q1, physical_system.dq1
        self.N_q2, self.dq2 = physical_system.N_q2, physical_system.dq2
        self.N_p1, self.dp1 = physical_system.N_p1, physical_system.dp1
        self.N_p2, self.dp2 = physical_system.N_p2, physical_system.dp2
        self.N_p3, self.dp3 = physical_system.N_p3, physical_system.dp3

        # Getting number of ghost zones, and the boundary
        # conditions that are utilized:
        N_g_q = self.N_ghost_q = physical_system.N_ghost_q
        N_g_p = self.N_ghost_p = physical_system.N_ghost_p

        self.boundary_conditions = physical_system.boundary_conditions

        # Declaring the communicator:
        self._comm = PETSc.COMM_WORLD.tompi4py()
        if (self.physical_system.params.num_devices > 1):
            af.set_device(self._comm.rank %
                          self.physical_system.params.num_devices)

        # Getting number of species:
        self.N_species = len(physical_system.params.mass)

        # Having the mass and charge along axis 1:
        self.physical_system.params.mass  = \
            af.cast(af.moddims(af.to_array(physical_system.params.mass),
                               1, self.N_species
                              ),
                    af.Dtype.f64
                   )

        self.physical_system.params.charge  = \
            af.cast(af.moddims(af.to_array(physical_system.params.charge),
                               1, self.N_species
                              ),
                    af.Dtype.f64
                   )

        PETSc.Sys.Print('\nBackend Details for Nonlinear Solver:')

        # Printing the backend details for each rank/device/node:
        PETSc.Sys.syncPrint(
            indent('Rank ' + str(self._comm.rank) + ' of ' +
                   str(self._comm.size - 1)))
        PETSc.Sys.syncPrint(indent('On Node: ' + socket.gethostname()))
        PETSc.Sys.syncPrint(indent('Device Details:'))
        PETSc.Sys.syncPrint(indent(af.info_str(), 2))
        PETSc.Sys.syncPrint(
            indent('Device Bandwidth = ' + str(bandwidth_test(100)) +
                   ' GB / sec'))
        PETSc.Sys.syncPrint()
        PETSc.Sys.syncFlush()

        self.performance_test_flag = performance_test_flag

        # Initializing variables which are used to time the components of the solver:
        if (performance_test_flag == True):

            self.time_ts = 0

            self.time_interp2 = 0
            self.time_sourcets = 0

            self.time_fvm_solver = 0
            self.time_reconstruct = 0
            self.time_riemann = 0

            self.time_fieldstep = 0
            self.time_interp3 = 0

            self.time_apply_bcs_f = 0
            self.time_communicate_f = 0

        petsc_bc_in_q1 = 'ghosted'
        petsc_bc_in_q2 = 'ghosted'

        # Only for periodic boundary conditions or shearing-box boundary conditions
        # do the boundary conditions passed to the DA need to be changed. PETSc
        # automatically handles the application of periodic boundary conditions when
        # running in parallel. For shearing box boundary conditions, an interpolation
        # operation needs to be applied on top of the periodic boundary conditions.
        # In all other cases, ghosted boundaries are used.

        if (self.boundary_conditions.in_q1_left == 'periodic'
                or self.boundary_conditions.in_q1_left == 'shearing-box'):
            petsc_bc_in_q1 = 'periodic'

        if (self.boundary_conditions.in_q2_bottom == 'periodic'
                or self.boundary_conditions.in_q2_bottom == 'shearing-box'):
            petsc_bc_in_q2 = 'periodic'

        if (self.boundary_conditions.in_q1_left == 'periodic'):
            try:
                assert (self.boundary_conditions.in_q1_right == 'periodic')
            except:
                raise Exception(
                    'Periodic boundary conditions need to be applied to \
                                 both the boundaries of a particular axis')

        if (self.boundary_conditions.in_q1_left == 'shearing-box'):
            try:
                assert (self.boundary_conditions.in_q1_right == 'shearing-box')
            except:
                raise Exception(
                    'Shearing box boundary conditions need to be applied to \
                                 both the boundaries of a particular axis')

        if (self.boundary_conditions.in_q2_bottom == 'periodic'):
            try:
                assert (self.boundary_conditions.in_q2_top == 'periodic')
            except:
                raise Exception(
                    'Periodic boundary conditions need to be applied to \
                                 both the boundaries of a particular axis')

        if (self.boundary_conditions.in_q2_bottom == 'shearing-box'):
            try:
                assert (self.boundary_conditions.in_q2_top == 'shearing-box')
            except:
                raise Exception(
                    'Shearing box boundary conditions need to be applied to \
                                 both the boundaries of a particular axis')

        nproc_in_q1 = PETSc.DECIDE
        nproc_in_q2 = PETSc.DECIDE

        # Since shearing boundary conditions require interpolations which are non-local:
        if (self.boundary_conditions.in_q2_bottom == 'shearing-box'):
            nproc_in_q1 = 1

        if (self.boundary_conditions.in_q1_left == 'shearing-box'):
            nproc_in_q2 = 1

        # DMDA is a data structure to handle a distributed structure
        # grid and its related core algorithms. It stores metadata of
        # how the grid is partitioned when run in parallel which is
        # utilized by the various methods of the solver.
        self._da_f = PETSc.DMDA().create(
            [self.N_q1, self.N_q2],
            dof=(self.N_species * (self.N_p1 + 2 * N_g_p) *
                 (self.N_p2 + 2 * N_g_p) * (self.N_p3 + 2 * N_g_p)),
            stencil_width=N_g_q,
            boundary_type=(petsc_bc_in_q1, petsc_bc_in_q2),
            proc_sizes=(nproc_in_q1, nproc_in_q2),
            stencil_type=1,
            comm=self._comm)

        # This DA is used by the FileIO routine dump_distribution_function():
        self._da_dump_f = PETSc.DMDA().create(
            [self.N_q1, self.N_q2],
            dof=(self.N_species * self.N_p1 * self.N_p2 * self.N_p3),
            stencil_width=N_g_q,
            boundary_type=(petsc_bc_in_q1, petsc_bc_in_q2),
            proc_sizes=(nproc_in_q1, nproc_in_q2),
            stencil_type=1,
            comm=self._comm)

        # This DA is used by the FileIO routine dump_moments():
        # Finding the number of definitions for the moments:
        attributes = [
            a for a in dir(self.physical_system.moments)
            if not a.startswith('_')
        ]

        # Removing utility functions:
        if ('integral_over_v' in attributes):
            attributes.remove('integral_over_v')

        self._da_dump_moments = PETSc.DMDA().create(
            [self.N_q1, self.N_q2],
            dof=self.N_species * len(attributes),
            proc_sizes=(nproc_in_q1, nproc_in_q2),
            comm=self._comm)

        # Creation of the local and global vectors from the DA:
        # This is for the distribution function
        self._glob_f = self._da_f.createGlobalVec()
        self._local_f = self._da_f.createLocalVec()

        # The following vector is used to dump the data to file:
        self._glob_dump_f = self._da_dump_f.createGlobalVec()
        self._glob_moments = self._da_dump_moments.createGlobalVec()

        # Getting the arrays for the above vectors:
        self._glob_f_array = self._glob_f.getArray()
        self._local_f_array = self._local_f.getArray()

        self._glob_moments_array = self._glob_moments.getArray()
        self._glob_dump_f_array = self._glob_dump_f.getArray()

        # Setting names for the objects which will then be
        # used as the key identifiers for the HDF5 files:
        PETSc.Object.setName(self._glob_dump_f, 'distribution_function')
        PETSc.Object.setName(self._glob_moments, 'moments')

        # Obtaining the array values of the cannonical variables:
        self.q1_center, self.q2_center = self._calculate_q_center()
        self.p1_center, self.p2_center, self.p3_center = self._calculate_p_center(
        )

        # Initialize according to initial condition provided by user:
        self._initialize(physical_system.params)

        # Obtaining start coordinates for the local zone
        # Additionally, we also obtain the size of the local zone
        ((i_q1_start, i_q2_start), (N_q1_local,
                                    N_q2_local)) = self._da_f.getCorners()
        (i_q1_end, i_q2_end) = (i_q1_start + N_q1_local - 1,
                                i_q2_start + N_q2_local - 1)

        # Applying dirichlet boundary conditions:
        if (self.physical_system.boundary_conditions.in_q1_left == 'dirichlet'
            ):
            # If local zone includes the left physical boundary:
            if (i_q1_start == 0):
                self.f[:, :N_g_q] = self.boundary_conditions.\
                                    f_left(self.f, self.q1_center, self.q2_center,
                                           self.p1_center, self.p2_center, self.p3_center,
                                           self.physical_system.params
                                          )[:, :N_g_q]

        if (self.physical_system.boundary_conditions.in_q1_right == 'dirichlet'
            ):
            # If local zone includes the right physical boundary:
            if (i_q1_end == self.N_q1 - 1):
                self.f[:, -N_g_q:] = self.boundary_conditions.\
                                     f_right(self.f, self.q1_center, self.q2_center,
                                             self.p1_center, self.p2_center, self.p3_center,
                                             self.physical_system.params
                                            )[:, -N_g_q:]

        if (self.physical_system.boundary_conditions.in_q2_bottom ==
                'dirichlet'):
            # If local zone includes the bottom physical boundary:
            if (i_q2_start == 0):
                self.f[:, :, :N_g_q] = self.boundary_conditions.\
                                       f_bot(self.f, self.q1_center, self.q2_center,
                                             self.p1_center, self.p2_center, self.p3_center,
                                             self.physical_system.params
                                            )[:, :, :N_g_q]

        if (self.physical_system.boundary_conditions.in_q2_top == 'dirichlet'):
            # If local zone includes the top physical boundary:
            if (i_q2_end == self.N_q2 - 1):
                self.f[:, :, -N_g_q:] = self.boundary_conditions.\
                                        f_top(self.f, self.q1_center, self.q2_center,
                                              self.p1_center, self.p2_center, self.p3_center,
                                              self.physical_system.params
                                             )[:, :, -N_g_q:]

        # Assigning the value to the PETSc Vecs(for dump at t = 0):
        (af.flat(self.f)).to_ndarray(self._local_f_array)
        (af.flat(self.f[:, :, N_g_q:-N_g_q,
                        N_g_q:-N_g_q])).to_ndarray(self._glob_f_array)

        # Assigning the function objects to methods of the solver:
        self._A_q = physical_system.A_q
        self._C_q = physical_system.C_q
        self._A_p = physical_system.A_p
        self._C_p = physical_system.C_p

        # Source/Sink term:
        self._source = physical_system.source

        # Initializing a variable to track time-elapsed:
        self.time_elapsed = 0
예제 #19
0
def gpuMBIR(tomo,angles,center,input_params):
        """
        MBIR reconstruction using GPU based gridding operators
        Inputs: tomo : 3D numpy sinogram array with dimensions same as tomopy
        angles : Array of angles in radians
        center : Floating point center of rotation
        input_params : A dictionary with the keys
        'gpu_device' : Device id of the gpu (For a 4 GPU cluster ; 0-3)
        'oversamp_factor': A factor by which to pad the image/data for FFT
        'num_iter' : Max number of MBIR iterations
        'smoothness' : Regularization constant
        'p': MRF shape param
        """
        print('Starting GPU MBIR recon')
        #allocate space for final answer 
        af.set_device(input_params['gpu_device']) #Set the device number for gpu based code
        #Change tomopy format
        new_tomo=np.transpose(tomo,(1,2,0)) #slice, columns, angles
        im_size =  new_tomo.shape[1]
        num_slice = new_tomo.shape[0]
        num_angles=new_tomo.shape[2]
        pad_size=np.int16(im_size*input_params['oversamp_factor'])
#        nufft_scaling = (np.pi/pad_size)**2
        num_iter = input_params['num_iter']
        mrf_sigma = input_params['smoothness']
        mrf_p = input_params['p']
        print('MRF params p=%f sigma=%f' %(mrf_p,mrf_sigma))
        #Initialize structures for NUFFT
        sino={}
        geom={}
        sino['Ns'] =  pad_size#Sinogram size after padding
        sino['Ns_orig'] = im_size #size of original sinogram
        sino['center'] = center + (sino['Ns']/2 - sino['Ns_orig']/2)  #for padded sinogram
        sino['angles'] = angles
        
        #Initialize NUFFT parameters
        print('Initialize NUFFT params')
        nufft_params = init_nufft_params(sino,geom)

        temp_y = afnp.zeros((sino['Ns'],num_angles),dtype=afnp.complex64)
        temp_x = afnp.zeros((sino['Ns'],sino['Ns']),dtype=afnp.complex64)
        x_recon  = afnp.zeros((num_slice/2,sino['Ns_orig'],sino['Ns_orig']),dtype=afnp.complex64)
        
        pad_idx = slice(sino['Ns']/2-sino['Ns_orig']/2,sino['Ns']/2+sino['Ns_orig']/2)

        #allocate output array
        rec_mbir_final=np.zeros((num_slice,sino['Ns_orig'],sino['Ns_orig']),dtype=np.float32)
        
        #Move all data to GPU
        print('Moving data to GPU')
        slice_1=slice(0,num_slice,2)
        slice_2=slice(1,num_slice,2)
        gdata=afnp.array(new_tomo[slice_1]+1j*new_tomo[slice_2],dtype=afnp.complex64)
        gradient = afnp.zeros((num_slice/2,sino['Ns_orig'],sino['Ns_orig']), dtype=afnp.complex64)#temp array to store the derivative of cost func
        z_recon  = afnp.zeros((num_slice/2,sino['Ns_orig'],sino['Ns_orig']),dtype=afnp.complex64)#Nesterov method variables
        t_nes = 1
        
        #Compute Lipschitz of gradient
        print('Computing Lipschitz of gradient')
        x_ones= afnp.ones((1,sino['Ns_orig'],sino['Ns_orig']),dtype=afnp.complex64)
        temp_x[pad_idx,pad_idx]=x_ones[0]
        temp_proj=forward_project(temp_x,nufft_params)
        temp_backproj=(back_project(temp_proj,nufft_params))[pad_idx,pad_idx]
        print('Adding Hessian of regularizer')
        temp_backproj2=afnp.zeros((1,sino['Ns_orig'],sino['Ns_orig']),dtype=afnp.complex64)
        temp_backproj2[0]=temp_backproj
        add_hessian(mrf_sigma,x_ones, temp_backproj2)
        L = np.max([temp_backproj2.real.max(),temp_backproj2.imag.max()])
        print('Lipschitz constant = %f' %(L))
        del x_ones,temp_proj,temp_backproj,temp_backproj2

        #loop over all slices
        for iter_num in range(num_iter):
          print('Iteration %d of %d'%(iter_num,num_iter))
        #Derivative of the data fitting term
          for i in range(num_slice/2):
            temp_x[pad_idx,pad_idx]=x_recon[i]
            Ax = forward_project(temp_x,nufft_params)
            temp_y[pad_idx]=gdata[i]
            gradient[i] =(back_project((Ax-temp_y),nufft_params))[pad_idx,pad_idx] #nufft_scaling
        #Derivative of regularization term
          tvd_update(mrf_p,mrf_sigma,x_recon, gradient) 
          #x_recon-=gradient/L
          x_recon,z_recon,t_nes=nesterovOGM2update(x_recon,z_recon,t_nes,gradient,L)
        
        #Move to CPU
        #Rescale result to match tomopy
        rec_mbir=np.array(x_recon,dtype=np.complex64)
        rec_mbir_final[slice_1]=np.array(rec_mbir.real,dtype=np.float32)
        rec_mbir_final[slice_2]=np.array(rec_mbir.imag,dtype=np.float32)
        return rec_mbir_final
예제 #20
0
def gpuSIRT(tomo, angles, center, input_params):
    print('Starting GPU SIRT recon')
    #allocate space for final answer
    af.set_device(
        input_params['gpu_device'])  #Set the device number for gpu based code
    #Change tomopy format
    new_tomo = np.transpose(tomo, (1, 2, 0))  #slice, columns, angles
    im_size = new_tomo.shape[1]
    num_slice = new_tomo.shape[0]
    num_angles = new_tomo.shape[2]
    pad_size = np.int16(im_size * input_params['oversamp_factor'])
    nufft_scaling = (np.pi / pad_size)**2
    num_iter = input_params['num_iter']
    #Initialize structures for NUFFT
    sino = {}
    geom = {}
    sino['Ns'] = pad_size  #Sinogram size after padding
    sino['Ns_orig'] = im_size  #size of original sinogram
    sino['center'] = center + (sino['Ns'] / 2 - sino['Ns_orig'] / 2
                               )  #for padded sinogram
    sino['angles'] = angles

    #Initialize NUFFT parameters
    nufft_params = init_nufft_params(sino, geom)
    temp_y = afnp.zeros((sino['Ns'], num_angles), dtype=afnp.complex64)
    temp_x = afnp.zeros((sino['Ns'], sino['Ns']), dtype=afnp.complex64)
    x_recon = afnp.zeros((num_slice / 2, sino['Ns_orig'], sino['Ns_orig']),
                         dtype=afnp.complex64)
    pad_idx = slice(sino['Ns'] / 2 - sino['Ns_orig'] / 2,
                    sino['Ns'] / 2 + sino['Ns_orig'] / 2)

    #allocate output array
    rec_sirt_final = np.zeros((num_slice, sino['Ns_orig'], sino['Ns_orig']),
                              dtype=np.float32)

    #Pre-compute diagonal scaling matrices ; one the same size as the image and the other the same as data
    #initialize an image of all ones
    x_ones = afnp.ones((sino['Ns_orig'], sino['Ns_orig']),
                       dtype=afnp.complex64)
    temp_x[pad_idx, pad_idx] = x_ones
    temp_proj = forward_project(temp_x,
                                nufft_params) * (sino['Ns'] * afnp.pi / 2)
    R = 1 / afnp.abs(temp_proj)
    R[afnp.isnan(R)] = 0
    R[afnp.isinf(R)] = 0
    R = afnp.array(R, dtype=afnp.complex64)

    #Initialize a sinogram of all ones
    y_ones = afnp.ones((sino['Ns_orig'], num_angles), dtype=afnp.complex64)
    temp_y[pad_idx] = y_ones
    temp_backproj = back_project(temp_y, nufft_params) * nufft_scaling / 2
    C = 1 / (afnp.abs(temp_backproj))
    C[afnp.isnan(C)] = 0
    C[afnp.isinf(C)] = 0
    C = afnp.array(C, dtype=afnp.complex64)

    #Move all data to GPU
    slice_1 = slice(0, num_slice, 2)
    slice_2 = slice(1, num_slice, 2)
    gdata = afnp.array(new_tomo[slice_1] + 1j * new_tomo[slice_2],
                       dtype=afnp.complex64)

    #loop over all slices
    for i in range(num_slice / 2):
        for iter_num in range(num_iter):
            #filtered back-projection
            temp_x[pad_idx, pad_idx] = x_recon[i]
            Ax = (np.pi / 2) * sino['Ns'] * forward_project(
                temp_x, nufft_params)
            temp_y[pad_idx] = gdata[i]
            x_recon[i] = x_recon[i] + (
                C * back_project(R * (temp_y - Ax), nufft_params) *
                nufft_scaling / 2)[pad_idx, pad_idx]

    #Move to CPU
    #Rescale result to match tomopy
    rec_sirt = np.array(x_recon, dtype=np.complex64)
    rec_sirt_final[slice_1] = np.array(rec_sirt.real, dtype=np.float32)
    rec_sirt_final[slice_2] = np.array(rec_sirt.imag, dtype=np.float32)
    return rec_sirt_final
예제 #21
0
    out.arr = ctypes.c_void_p(af_array_ptr)
    print("Converting from ", hex(af_array_ptr))
    # print("New array has device pointer ",hex(get_gpu_pointer(out)))
    return out


def get_use_count(arr):
    uses = ctypes.c_int(0)
    af.safe_call(af.backend.get().af_get_data_ref_count(
        af.c_pointer(uses), arr.arr))
    return uses


# use gpu backend
af.set_backend('cuda')
af.set_device(0)  # select the gpu to use
af.info()

van = np.array([1, 2, 3, 5])
van = np.vander(van)
two = np.ones((4, 4)) * 2

print("Define stuff", flush=True)
afvan = af.interop.from_ndarray(van)
afvan = afvan.as_type(af.Dtype.f32)

afthr = af.interop.from_ndarray(two)
afthr = afthr.as_type(af.Dtype.f32)

af.device.print_mem_info("before loops")
예제 #22
0
from dg_maxwell import params
from dg_maxwell import wave_equation

import arrayfire as af

af.set_backend('cpu')
af.set_device(0)

def L1_norm(u):
    '''

    A function to calculate the L1 norm of error using
    the polynomial obtained using Lagrange interpolation

    Parameters
    ----------
    u : arrayfire.Array [N_LGL N_Elements 1 1]
        Difference between analytical and numerical u at the mapped LGL points.

    Returns
    -------
    L1_norm : float64
              The L1 norm of error.

    '''
    interpolated_coeffs = af.reorder(lagrange.lagrange_interpolation_u(\
                                           u), 2, 1, 0)

    L1_norm = af.sum(lagrange.integrate(interpolated_coeffs))

    return L1_norm
예제 #23
0
# The complete license agreement can be obtained at:
# http://arrayfire.com/licenses/BSD-3-Clause
########################################################

import arrayfire as af
import sys
import os

if __name__ == "__main__":

    if (len(sys.argv) == 1):
        raise RuntimeError("Expected to the image as the first argument")

    if not os.path.isfile(sys.argv[1]):
        raise RuntimeError("File %s not found" % sys.argv[1])

    if (len(sys.argv) >  2):
        af.set_device(int(sys.argv[2]))

    af.info()

    hist_win = af.Window(512, 512, "3D Plot example using ArrayFire")
    img_win  = af.Window(480, 640, "Input Image")

    img = (af.load_image(sys.argv[1])).(af.Dtype.u8)
    hist = af.histogram(img, 256, 0, 255)

    while (not hist_win.close()) and (not img_win.close()):
        hist_win.hist(hist, 0, 255)
        img_win.image(img)
    return af.mean(payoff) * math.exp(-r * t)

def monte_carlo_simulate(N, use_barrier, num_iter = 10):
    steps = 180
    stock_price = 100.0
    maturity = 0.5
    volatility = 0.3
    rate = 0.01
    strike = 100
    barrier = 115.0

    start = time()
    for i in range(num_iter):
        monte_carlo_options(N, stock_price, maturity, volatility, rate, strike, steps,
                            use_barrier, barrier)

    return (time() - start) / num_iter

if __name__ == "__main__":
    if (len(sys.argv) > 1):
        af.set_device(int(sys.argv[1]))
    af.info()

    monte_carlo_simulate(1000, use_barrier = False)
    monte_carlo_simulate(1000, use_barrier = True )
    af.sync()

    for n in range(10000, 100001, 10000):
        print("Time for %7d paths - vanilla method: %4.3f ms, barrier method: % 4.3f ms\n" %
              (n, 1000 * monte_carlo_simulate(n, False, 100), 1000 * monte_carlo_simulate(n, True, 100)))
#######################################################
# Copyright (c) 2015, ArrayFire
# All rights reserved.
#
# This file is distributed under 3-clause BSD license.
# The complete license agreement can be obtained at:
# http://arrayfire.com/licenses/BSD-3-Clause
########################################################

import arrayfire as af

af.info()
print(af.device_info())
print(af.get_device_count())
print(af.is_dbl_supported())
af.sync()

print('starting the loop')
for k in range(af.get_device_count()):
    af.set_device(k)
    dev = af.get_device()
    assert(k == dev)

    print(af.is_dbl_supported(k))

    a = af.randu(100, 100)
    af.sync(dev)
    mem_info = af.device_mem_info()
    assert(mem_info['alloc']['buffers'] == 1)
    assert(mem_info[ 'lock']['buffers'] == 1)
예제 #26
0
#! /usr/bin/env python3
# -*- coding: utf-8 -*-

import numpy as np
from scipy import special as sp
import arrayfire as af

from dg_maxwell import utils
from dg_maxwell import params

af.set_backend(params.backend)
af.set_device(params.device)


def LGL_points(N):
    '''
    Calculates : math: `N` Legendre-Gauss-Lobatto (LGL) points.
    LGL points are the roots of the polynomial

    :math: `(1 - \\xi ** 2) P_{n - 1}'(\\xi) = 0`

    Where :math: `P_{n}(\\xi)` are the Legendre polynomials.
    This function finds the roots of the above polynomial.

    Parameters
    ----------

    N : int
        Number of LGL nodes required

    Returns
예제 #27
0
#######################################################
# Copyright (c) 2015, ArrayFire
# All rights reserved.
#
# This file is distributed under 3-clause BSD license.
# The complete license agreement can be obtained at:
# http://arrayfire.com/licenses/BSD-3-Clause
########################################################

import arrayfire as af

af.info()
print(af.device_info())
print(af.get_device_count())
print(af.is_dbl_supported())
af.sync()

print('starting the loop')
for k in range(af.get_device_count()):
    af.set_device(k)
    dev = af.get_device()
    assert (k == dev)

    print(af.is_dbl_supported(k))

    a = af.randu(100, 100)
    af.sync(dev)
    mem_info = af.device_mem_info()
    assert (mem_info['alloc']['buffers'] == 1)
    assert (mem_info['lock']['buffers'] == 1)