Exemple #1
    def __init__(self, slvr_cfg):
        RimeSolver Constructor

            slvr_cfg : SolverConfiguration
                Solver Configuration variables
        super(CompositeRimeSolver, self).__init__(slvr_cfg=slvr_cfg)

        # Create thread local storage
        self.thread_local = threading.local()


        # Configure the dimensions of the beam cube
                                description='E cube l width')

                                description='E cube m height')

                                description='E cube nu depth')

        # Monkey patch v4 antenna pair functions into the object
        from montblanc.impl.rime.v4.ant_pairs import monkey_patch_antenna_pairs

        # Copy the v4 arrays and properties and
        # modify them for use on this Composite Solver
        A_main, P_main = self._cfg_comp_slvr_arys_and_props(v4Arrays, v4Props)


        # Look for ignored and supplied arrays in the solver configuration
        array_cfg = slvr_cfg.get('array_cfg', {})
        ignore = array_cfg.get('ignore', None)
        supplied = array_cfg.get('supplied', None)

        # Create arrays on the solver, ignoring
        # and using supplied arrays as necessary
        self.create_arrays(ignore, supplied)

        # PyCUDA contexts for each GPU device
        self.dev_ctxs = slvr_cfg.get(Options.CONTEXT)
        # Number of GPU Solvers created for each device
        nsolvers = slvr_cfg.get(Options.NSOLVERS)
        # Maximum number of enqueued visibility chunks
        # before throttling is applied
        self.throttle_factor = slvr_cfg.get(Options.VISIBILITY_THROTTLE_FACTOR)

        # Massage the contexts for each device into a list
        if not isinstance(self.dev_ctxs, list):
            self.dev_ctxs = [self.dev_ctxs]

            'Using {d} solver(s) per device.'.format(d=nsolvers))

        # Shorten the type name
        C = CompositeRimeSolver

        # Create a one thread executor for each device context,
        # i.e. a thread per device
        enqueue_executors = [cf.ThreadPoolExecutor(1) for ctx in self.dev_ctxs]
        sync_executors = [cf.ThreadPoolExecutor(1) for ctx in self.dev_ctxs]

        self.enqueue_executors = enqueue_executors
        self.sync_executors = sync_executors
        self.initialised = False
        self._vis_write_mode = slvr_cfg.get(Options.VISIBILITY_WRITE_MODE)

            'Created {d} executor(s).'.format(d=len(enqueue_executors)))

        # Initialise executor threads
        for ex, ctx in zip(enqueue_executors, self.dev_ctxs):
            ex.submit(C._thread_init, self, ctx).result()

        for ex, ctx in zip(sync_executors, self.dev_ctxs):
            ex.submit(C._thread_init, self, ctx).result()

            'Initialised {d} thread(s).'.format(d=len(enqueue_executors)))

        # Get a template dictionary
        T = self.template_dict()

        A_sub, P_sub = self._cfg_sub_slvr_arys_and_props(v4Arrays, v4Props)

        # Find the budget with the lowest memory usage
        # Work with the device with the lowest memory
        budgets = sorted([
            ex.submit(C._thread_budget, self, slvr_cfg, A_sub, T).result()
            for ex in enqueue_executors
                         key=lambda T: T[1])

        P, M, mem = budgets[0]

        # Log some information about the memory budget
        # and dimension reduction
        montblanc.log.info(('Selected a solver memory budget of {b} '
                            'for {d} solvers.').format(b=mbu.fmt_bytes(mem),

        montblanc.log.info(('The following dimension reductions '
                            'have been applied:'))

        for k, v in M.iteritems():
            montblanc.log.info('{p}{d}: {id} => {rd}'.format(p=' ' * 4,

        # Create the sub solver configuration
        subslvr_cfg = slvr_cfg.copy()
        subslvr_cfg[Options.DATA_SOURCE] = Options.DATA_SOURCE_EMPTY
        subslvr_cfg[Options.CONTEXT] = ctx

        subslvr_cfg = self._cfg_subslvr_dims(subslvr_cfg, P)

        # Extract the dimension differences
        self.src_diff = P[Options.NSRC]
        self.time_diff = P[Options.NTIME]
        self.ant_diff = P[Options.NA]
        self.bl_diff = P[Options.NBL]
        self.chan_diff = P[Options.NCHAN]

        montblanc.log.info('Creating {s} solver(s) on {d} device(s).'.format(
            s=nsolvers, d=len(enqueue_executors)))

        # Now create the solvers on each thread
        for ex in enqueue_executors:
            ex.submit(C._thread_create_solvers, self, subslvr_cfg, P,

        montblanc.log.info('Solvers Created')

        # Register arrays and properties on each thread's solvers
        for ex in enqueue_executors:
            ex.submit(C._thread_reg_sub_arys_and_props, self, A_sub,

        montblanc.log.info('Priming Memory Pools')

        # Prime the memory pools on each sub-solver
        for ex in enqueue_executors:
            ex.submit(C._thread_prime_memory_pools, self).result()
Exemple #2
def _budget(cube, slvr_cfg):
    # Figure out a viable dimension configuration
    # given the total problem size
    mem_budget = slvr_cfg.get('mem_budget', 2*ONE_GB)
    bytes_required = cube.bytes_required()

    src_dims = mbu.source_nr_vars() + ['nsrc']
    dim_names = ['na', 'nbl', 'ntime'] + src_dims
    global_sizes = cube.dim_global_size(*dim_names)
    na, nbl, ntime = global_sizes[:3]

    # Keep track of original dimension sizes and any reductions that are applied
    original_sizes = { r: s for r, s in zip(dim_names, global_sizes) }
    applied_reductions = {}

    def _reduction():
        # Reduce over time first
        trange = _uniq_log2_range(1, ntime, 5)
        for t in trange[0:1]:
            yield [('ntime', t)]

        # Attempt reduction over source
        sbs = slvr_cfg['source_batch_size']
        srange = _uniq_log2_range(10, sbs, 5) if sbs > 10 else 10
        src_dim_gs = global_sizes[3:]

        for bs in srange:
            yield [(d, bs if bs < gs else gs) for d, gs
                in zip(src_dims, src_dim_gs)]

        # Try the rest of the timesteps
        for t in trange[1:]:
            yield [('ntime', t)]

        # Reduce by baseline
        for bl in _uniq_log2_range(na, nbl, 5):
            yield [('nbl', bl)]

    for reduction in _reduction():
        if bytes_required > mem_budget:
            for dim, size in reduction:
                applied_reductions[dim] = size
                cube.update_dimension(dim, lower_extent=0, upper_extent=size)

        bytes_required = cube.bytes_required()

    # Log some information about the memory_budget
    # and dimension reduction
    montblanc.log.info(("Selected a solver memory budget of {rb} "
        "given a hard limit of {mb}.").format(

    if len(applied_reductions) > 0:
        montblanc.log.info("The following dimension reductions "
            "were applied:")

        for k, v in applied_reductions.iteritems():
            montblanc.log.info('{p}{d}: {id} => {rd}'.format
                (p=' '*4, d=k, id=original_sizes[k], rd=v))
        montblanc.log.info("No dimension reductions were applied.")

    return applied_reductions, bytes_required
Exemple #4
    def _thread_budget(self, slvr_cfg, A_sub, props):
        Get memory budget and dimension reduction
        information from the CUDA device associated
        with the current thread and context
        montblanc.log.debug('Budgeting in thread %s',

        # Query free memory on this context
        (free_mem, total_mem) = cuda.mem_get_info()

        device = self.thread_local.context.get_device()

        montblanc.log.info('{d}: {t} total {f} free.'.format(

        # Work with a supplied memory budget, otherwise use
        # free memory less an amount equal to the upper size
        # of an NVIDIA context
        mem_budget = slvr_cfg.get('mem_budget', free_mem - 200 * ONE_MB)

        nsolvers = slvr_cfg.get(Options.NSOLVERS)
        na = slvr_cfg.get(Options.NA)
        nsrc = slvr_cfg.get(Options.SOURCE_BATCH_SIZE)
        src_str_list = [Options.NSRC] + mbu.source_nr_vars()
        src_reduction_str = '&'.join(
            ['%s=%s' % (nr_var, nsrc) for nr_var in src_str_list])

        ntime_split = np.int32(np.ceil(100.0 / nsolvers))
        ntime_split_str = 'ntime={n}'.format(n=ntime_split)

        # Figure out a viable dimension configuration
        # given the total problem size
        viable, modded_dims = mbu.viable_dim_config(mem_budget, A_sub, props, [
            ntime_split_str, src_reduction_str, 'ntime',
            'nbl={na}&na={na}'.format(na=na), 'nchan=50%'
        ], nsolvers)

        # Create property dictionary with updated
        # dimensions.
        P = props.copy()

        required_mem = mbu.dict_array_bytes_required(A_sub, P)

        if not viable:
            dim_set_str = ', '.join(
                ['%s=%s' % (k, v) for k, v in modded_dims.iteritems()])

            ary_list_str = '\n'.join([
                '%-*s %-*s %s' %
                (15, a['name'], 10, mbu.fmt_bytes(mbu.dict_array_bytes(
                    a, P)), mbu.shape_from_str_tuple(a['shape'], P))
                for a in sorted(A_sub,
                                key=lambda a: mbu.dict_array_bytes(a, P))

            raise MemoryError(
                "Tried reducing the problem size "
                "by setting '%s' on all arrays, "
                "but the resultant required memory of %s "
                "for each of %d solvers is too big "
                "to fit within the memory budget of %s. "
                "List of biggests offenders:\n%s "
                "\nSplitting the problem along the "
                "channel dimension needs to be "
                "implemented." %
                (dim_set_str, mbu.fmt_bytes(required_mem), nsolvers,
                 mbu.fmt_bytes(mem_budget), ary_list_str))

        return P, modded_dims, required_mem
