Ejemplo n.º 1
0
    def __init__(self, exps=None, cs=None, nomials=None, simplify=True):
        if nomials and (exps or cs):
            raise ValueError("The NomialData initializor accepts either"
                             " exps and cs, or nomials, but not both.")
        elif nomials:
            self.nomials = nomials
            exps = functools_reduce(add, (tuple(s.exps) for s in nomials))
            cs = np.hstack((mag(s.cs) for s in nomials))
            simplify = False  # nomials have already been simplified
        elif exps is None or cs is None:
            raise ValueError("creation of a NomialData requires exps and cs.")

        if simplify:
            exps, cs = simplify_exps_and_cs(exps, cs)
        self.exps, self.cs = exps, cs
        self.any_nonpositive_cs = any(mag(c) <= 0 for c in self.cs)
        self.varlocs, self.varstrs = locate_vars(self.exps)
        self.values = {vk: vk.descr["value"] for vk in self.varlocs
                       if "value" in vk.descr}
        if nomials:
            self.units = tuple(s.units for s in nomials)
        elif isinstance(self.cs, Quantity):
            self.units = Quantity(1, self.cs.units)
        else:
            self.units = None

        self._hashvalue = None
Ejemplo n.º 2
0
 def init_from_nomials(self, nomials):
     """Way to initialize from nomials. Calls __init__.
     Used by subclass __init__ methods.
     """
     exps = functools_reduce(add, (tuple(s.exps) for s in nomials))
     cs = np.hstack((mag(s.cs) for s in nomials))
     # nomials are already simplified, so simplify=False
     NomialData.__init__(self, exps, cs, simplify=False)
     self.units = tuple(s.units for s in nomials)
Ejemplo n.º 3
0
    def _collect_explicit_graded(cls, block_structure):
        """
        Collect the 'explicit_graded' field for every block.
        """

        def _set_field(block_key, field_value):
            """
            Sets the explicit graded field to the given value for the
            given block.
            """
            block_structure.set_transformer_block_field(block_key, cls, cls.EXPLICIT_GRADED_FIELD_NAME, field_value)

        def _get_field(block_key):
            """
            Gets the explicit graded field to the given value for the
            given block.
            """
            return block_structure.get_transformer_block_field(block_key, cls, cls.EXPLICIT_GRADED_FIELD_NAME)

        block_types_to_ignore = {"course", "chapter", "sequential"}

        for block_key in block_structure.topological_traversal():
            if block_key.block_type in block_types_to_ignore:
                _set_field(block_key, None)
            else:
                explicit_field_on_block = get_field_on_block(block_structure.get_xblock(block_key), "graded")
                if explicit_field_on_block is not None:
                    _set_field(block_key, explicit_field_on_block)
                else:
                    values_from_parents = [
                        _get_field(parent)
                        for parent in block_structure.get_parents(block_key)
                        if parent.block_type not in block_types_to_ignore
                    ]
                    non_null_values_from_parents = [value for value in values_from_parents if not None]
                    explicit_from_parents = functools_reduce(lambda x, y: x or y, non_null_values_from_parents, None)
                    _set_field(block_key, explicit_from_parents)
Ejemplo n.º 4
0
def parse_result(result, constants, beforesubs, sweep={}, linkedsweep={},
                 freevar_sensitivity_tolerance=1e-4,
                 localmodel_sensitivity_requirement=0.1):
    "Parses a GP-like result dict into a SolutionArray-like dict."
    cost = result["cost"]
    freevariables = dict(result["variables"])
    sweepvariables = {var: val for var, val in constants.items()
                      if var in sweep or var in linkedsweep}
    constants = {var: val for var, val in constants.items()
                 if var not in sweepvariables}
    variables = dict(freevariables)
    variables.update(constants)
    variables.update(sweepvariables)
    sensitivities = dict(result["sensitivities"])

    # Remap monomials after substitution and simplification.
    #  The monomial sensitivities from the GP/SP are in terms of this
    #  smaller post-substitution list of monomials, so we need to map that
    #  back to the pre-substitution list.
    #
    #  Each "smap" is a list of HashVectors (mmaps),
    #    whose keys are monomial indexes pre-substitution,
    #    and whose values are the percentage of the simplified monomial's
    #    coefficient that came from that particular parent
    nu = result["sensitivities"]["monomials"]
    # HACK: simplified solves need a mutated beforesubs, as created in Model
    if hasattr(beforesubs, "smaps"):
        nu_ = np.zeros(len(beforesubs.cs))
        little_counter, big_counter = 0, 0
        for j, smap in enumerate(beforesubs.smaps):
            for i, mmap in enumerate(smap):
                for idx, percentage in mmap.items():
                    nu_[idx + big_counter] += percentage*nu[i + little_counter]
            little_counter += len(smap)
            big_counter += len(beforesubs.signomials[j].cs)
    sensitivities["monomials"] = nu_

    sens_vars = {var: sum([beforesubs.exps[i][var]*nu_[i] for i in locs])
                 for (var, locs) in beforesubs.varlocs.items()}
    sensitivities["variables"] = sens_vars

    # free-variable sensitivities must be <= some epsilon
    for var, S in sensitivities["variables"].items():
        if var in freevariables and abs(S) > freevar_sensitivity_tolerance:
            raise ValueError("free variable too sensitive: S_{%s} = "
                             "%0.2e" % (var, S))

    localexp = {var: S for (var, S) in sens_vars.items()
                if abs(S) >= localmodel_sensitivity_requirement}
    localcs = (variables[var]**-S for (var, S) in localexp.items())
    localc = functools_reduce(mul, localcs, cost)
    localmodel = Monomial(localexp, localc)

    # vectorvar substitution
    veckeys = set()
    for var in beforesubs.varlocs:
        if "idx" in var.descr and "shape" in var.descr:
            descr = dict(var.descr)
            idx = descr.pop("idx")
            if "value" in descr:
                descr.pop("value")
            if "units" in descr:
                units = descr.pop("units")
                veckey = VarKey(**descr)
                veckey.descr["units"] = units
            else:
                veckey = VarKey(**descr)
            veckeys.add(veckey)

            for vardict in [variables, sensitivities["variables"],
                            constants, sweepvariables, freevariables]:
                if var in vardict:
                    if veckey in vardict:
                        vardict[veckey][idx] = vardict[var]
                    else:
                        vardict[veckey] = np.full(var.descr["shape"], np.nan)
                        vardict[veckey][idx] = vardict[var]

                    del vardict[var]

    if hasattr(beforesubs, "varkeysubs"):
        for origvk, subvk in beforesubs.varkeysubs.items():
            for data in [constants, sweepvariables, freevariables, variables,
                         sensitivities["variables"]]:
                if subvk in data:
                    qty = isinstance(origvk.units, Quantity)
                    if data is sensitivities["variables"] or not qty:
                        data[origvk] = data[subvk]
                    else:
                        scale = (subvk.units/origvk.units).to("dimensionless")
                        data[origvk] = data[subvk] * scale

    return dict(cost=cost,
                constants=constants,
                sweepvariables=sweepvariables,
                freevariables=freevariables,
                variables=variables,
                sensitivities=sensitivities,
                localmodel=localmodel)
Ejemplo n.º 5
0
 def get_query(self):
     return functools_reduce(lambda q, item_id: q | Q(id=item_id), self.ids,
                             Q())
Ejemplo n.º 6
0
def quantized_inner_product_check_rule(x,
                                       w,
                                       b,
                                       scale_q,
                                       offset_q,
                                       scale_deq_req,
                                       offset_req,
                                       y,
                                       quant_algo,
                                       scale_sqrt,
                                       num_output,
                                       transpose,
                                       bias_term,
                                       axis,
                                       kernel_name="quantized_inner_product"):
    """
    Check the legality of each entry
    """
    # x info
    shape_x = x.get('shape')
    dtype_x = x.get('dtype')
    format_x = x.get('format')
    m_shape = shape_x[0]
    km_shape = shape_x[1] * shape_x[2] * shape_x[3] * shape_x[4]

    if functools_reduce(lambda x, y: x * y, shape_x) >= SHAPE_SIZE_LIMIT:
        raise RuntimeError("The shape_x exceed 32 bit limitations! ")

    if shape_x[-1] != 32:
        raise RuntimeError("For non_quant 'NC1HWC0' x, the C0 must be 32!")

    util.check_dtype_rule(dtype_x, ['uint8'])

    if format_x != 'NC1HWC0':
        raise RuntimeError("For IP situation, x format must be NC1HWC0!")

    # gevm
    is_gevm = m_shape == 1
    if is_gevm:
        if km_shape % 512 != 0:
            raise RuntimeError("for quant_gevm, KM/KN must be multi of 512!")

    # w info
    shape_w = w.get('shape')
    dtype_w = w.get('dtype')
    format_w = w.get('format')

    if functools_reduce(lambda x, y: x * y, shape_w) >= SHAPE_SIZE_LIMIT:
        raise RuntimeError("The shape_w exceed 32 bit limitations! ")

    util.check_dtype_rule(dtype_w, ['int8'])

    if format_w != 'FRACTAL_Z':
        raise RuntimeError(
            "For quant IP situation, w format must be FRACTAL_Z!")

    if shape_w[2] != 16 or shape_w[3] != 32:
        raise RuntimeError(
            "For quant IP situation, last two dim must be 16 and 32!")

    kn_shape = shape_w[0] * shape_w[3]
    n_shape = shape_w[1] * shape_w[2]

    # Check shape
    if km_shape != kn_shape:
        raise RuntimeError("KM of input_x must be equal to KN of input_w!")

    # y info
    shape_y = y.get('shape')
    dtype_y = y.get('dtype')
    format_y = y.get('format')

    if shape_y[-1] != 16:
        raise RuntimeError("For Quant 'NC1HWC0' y, the C0 must be 32!")

    util.check_dtype_rule(dtype_y, ['float16'])

    if format_y != 'NC1HWC0':
        raise RuntimeError("For IP situation, y format must be NC1HWC0!")

    # b info
    if bias_term:
        shape_b = b.get('shape')
        dtype_b = b.get('dtype')
        format_b = b.get('format')
        b_size = shape_b[1] * shape_b[4]
        # Check info
        util.check_dtype_rule(dtype_b, ['int32'])
        if format_b != 'NC1HWC0':
            raise RuntimeError("For IP situation, b format must be NC1HWC0!")
        if b_size != n_shape:
            raise RuntimeError(
                "For bias, the C1*C0 must equal to aligned_Cout!")
    else:
        if b is not None:
            raise RuntimeError("for bias_term false, the b must be an None!")

    if transpose:
        raise RuntimeError("for quantized IP, only support transpose false")
Ejemplo n.º 7
0
    def __init__(self, var, indices, updates, var_out, nd_flag, kernel_name,
                 compute_type):
        """
        Init scatter base parameters

        Parameters
        ----------
        var: dict
            data of input
            datatype suports float32,float16,int32,int8,uint8
        indices: dict
            data of indices
            datatype supports int32
        updates: dict
            data of updates
            datatype supports float32,float16,int32,int8,uint8
        var_out: dict
            data of input
        nd_flag: bool
            if this op is nd operator
        kernel_name: str
            the name of the operator
        compute_type: str
            the compute type of scatter
        Returns
        -------
        None
        """
        self.tik_instance = tik.Tik(tik.Dprofile())
        self.nd_flag = nd_flag
        self.var_shape = var.get("shape")
        self.var_dtype = var.get("dtype").lower()
        self.indices_shape = indices.get("shape")
        self.indices_dtype = indices.get("dtype").lower()
        self.updates_shape = updates.get("shape")
        self.updates_dtype = updates.get("dtype").lower()
        self.var_ele_num = functools_reduce(lambda x, y: x * y, self.var_shape)
        self.indices_num = functools_reduce(lambda x, y: x * y,
                                            self.indices_shape)
        self.updates_num = functools_reduce(lambda x, y: x * y,
                                            self.updates_shape)
        self.kernel_name = kernel_name

        if self.indices_shape == (1,) and \
                len(self.var_shape)-len(self.updates_shape) == 1:
            if not nd_flag:
                self.updates_shape = (1, ) + self.updates_shape

        self.check_param(var_out)
        if nd_flag:
            if self.indices_shape[-1] == len(self.var_shape):
                self.update_data_num = 1
            else:
                self.update_data_num = functools_reduce(
                    lambda x, y: x * y,
                    self.var_shape[self.indices_shape[-1]:])
            self.max_indice = functools_reduce(
                lambda x, y: x * y, self.var_shape[0:self.indices_shape[-1]])
            self.index_dims = self.indices_shape[-1]
        else:
            if len(self.var_shape) > 1:
                self.update_data_num = functools_reduce(
                    lambda x, y: x * y, self.var_shape[1:])
            else:
                self.update_data_num = 1
            self.max_indice = self.var_shape[0]
            self.index_dims = 1

        self.compute_type = compute_type

        self.ub_size_bytes = (
            tbe_platform.cce_conf.get_soc_spec(tbe_platform.cce_conf.UB_SIZE) -
            8192)
        self.var_dtype_bytes_size = tbe_platform.cce_intrin.get_bit_len(
            self.var_dtype) // 8
        self.indices_dtype_bytes_size = tbe_platform.cce_intrin.get_bit_len(
            self.indices_dtype) // 8
        self.var_data_each_block = 32 // self.var_dtype_bytes_size
        self.indices_data_each_block = 32 // self.indices_dtype_bytes_size
        self.indices_ub_number = 0
        self.updates_ub_number = 0

        self.index_loop_num = 0

        self.max_num_one_repeat = 128
        if self.var_dtype in ("float32", "int32"):
            self.max_num_one_repeat = 64

        if self.update_data_num < self.var_data_each_block:
            self.block_num = 1
        else:
            ai_core_num = tbe_platform.cce_conf.get_soc_spec(
                tbe_platform.cce_conf.CORE_NUM)
            self.indice_step = math.ceil(self.max_indice / ai_core_num)
            self.block_num = math.ceil(self.max_indice / self.indice_step)

        self.var_gm = self.tik_instance.Tensor(self.var_dtype,
                                               self.var_shape,
                                               name="var_gm",
                                               scope=tik.scope_gm)
        self.indices_gm = self.tik_instance.Tensor(self.indices_dtype,
                                                   self.indices_shape,
                                                   name="indices_gm",
                                                   scope=tik.scope_gm)
        self.updates_gm = self.tik_instance.Tensor(self.updates_dtype,
                                                   self.updates_shape,
                                                   name="updates_gm",
                                                   scope=tik.scope_gm)
        self.out_gm = self.tik_instance.Tensor(self.var_dtype,
                                               self.var_shape,
                                               name="out_gm",
                                               scope=tik.scope_gm)

        self.vconv_dst_dtype = "float16"

        self.init_ub_tensor_para()
        self.var_vconv_ub = None
        self.updates_vconv_ub = None
        self.var_tile_vconv_ub = None
        self.updates_tile_vconv_ub = None

        self.var_ub = None
        self.updates_ub = None
        self.indices_ub = None
        self.var_tile_ub = None
        self.updates_tile_ub = None

        self.var_read_index = None
        self.updates_read_index = None
        self.indices_loop_index = None
        self.indices_tmp = None
Ejemplo n.º 8
0
    def localsolve(self, solver=None, verbosity=1, x0=None, rel_tol=1e-4,
                   iteration_limit=50, *args, **kwargs):
        """Locally solves a SignomialProgram and returns the solution.

        Arguments
        ---------
        solver : str or function (optional)
            By default uses one of the solvers found during installation.
            If set to "mosek", "mosek_cli", or "cvxopt", uses that solver.
            If set to a function, passes that function cs, A, p_idxs, and k.
        verbosity : int (optional)
            If greater than 0, prints solve time and number of iterations.
            Each GP is created and solved with verbosity one less than this, so
            if greater than 1, prints solver name and time for each GP.
        x0 : dict (optional)
            Initial location to approximate signomials about.
        rel_tol : float
            Iteration ends when this is greater than the distance between two
            consecutive solve's objective values.
        iteration_limit : int
            Maximum GP iterations allowed.
        *args, **kwargs :
            Passed to solver function.


        Returns
        -------
        result : dict
            A dictionary containing the translated solver result.
        """
        if verbosity > 0:
            print("Beginning signomial solve.")
            self.starttime = time()
        self.gps = []  # NOTE: SIDE EFFECTS
        prevcost, cost, rel_improvement = None, None, None
        while rel_improvement is None or rel_improvement > rel_tol:
            if len(self.gps) > iteration_limit:
                raise RuntimeWarning("""problem unsolved after %s iterations.

    The last result is available in Model.program.gps[-1].result. If the gps
    appear to be converging, you may wish to increase the iteration limit by
    calling .localsolve(..., iteration_limit=NEWLIMIT).""" % len(self.gps))
            gp = self.step(x0, verbosity=verbosity-1)
            self.gps.append(gp)  # NOTE: SIDE EFFECTS
            try:
                result = gp.solve(solver, verbosity-1, *args, **kwargs)
            except (RuntimeWarning, ValueError):
                nearest_feasible = feasibility_model(gp, "max")
                self.gps.append(nearest_feasible)
                result = nearest_feasible.solve(verbosity=verbosity-1)
                result["cost"] = None
            x0 = result["variables"]
            prevcost, cost = cost, result["cost"]
            if prevcost and cost:
                rel_improvement = abs(prevcost-cost)/(prevcost + cost)
            else:
                rel_improvement = None
        # solved successfully!
        if verbosity > 0:
            print("Solving took %i GP solves" % len(self.gps)
                  + " and %.3g seconds." % (time() - self.starttime))

        # parse the result and return nu's of original monomials from
        #  variable sensitivities
        nu = result["sensitivities"]["monomials"]
        sens_vars = {var: sum([gp.exps[i][var]*nu[i] for i in locs])
                     for (var, locs) in gp.varlocs.items()}
        nu_ = []
        for signomial in self.signomials:
            for c, exp in zip(signomial.cs, signomial.exps):
                var_ss = [sens_vars[var]*val for var, val in exp.items()]
                nu_.append(functools_reduce(mul, var_ss, np.sign(c)))
        result["sensitivities"]["monomials"] = np.array(nu_)
        # TODO: SP sensitivities are weird, and potentially incorrect

        self.result = result  # NOTE: SIDE EFFECTS
        return result
Ejemplo n.º 9
0
    def __init__(self, src, dst, src_format, dst_format, kernel_name):
        """
        Init zn_2_hwcn_lstm parameters

        Parameters
        ----------
        src : dict, shape and dtype of input.
        dst: dict, shape and dtype of input.
        src_format: str, source data format, can be fractal_zn.
        dst_format: str, target data format, can be hwcn.
        kernel_name: str, kernel name, default value is "zn_2_hwcn_lstm".
        Returns
        -------
        None
        """
        self.tik_instance = tik.Tik(tik.Dprofile())
        self.src_format = src_format
        self.dst_format = dst_format
        self.kernel_name = kernel_name
        self.src_shape = src.get("shape")
        self.src_dtype = src.get("dtype").lower()
        self.dst_shape = dst.get("shape")
        self.dst_dtype = dst.get("dtype").lower()
        self.h = self.dst_shape[3] // 4
        self.i = self.dst_shape[2] - self.h
        self.h_align = math.ceil(self.h / 16) * 16
        self.i_align = math.ceil(self.i / 16) * 16
        self.src_data_num = functools_reduce(lambda x, y: x * y,
                                             self.src_shape[:])
        self.ub_size_bytes = UB_SIZE - 9216
        self.core_num = MAX_CORE_NUM
        self.src_dtype_bytes_size = tbe_platform.cce_intrin.get_bit_len(
            self.src_dtype) // 8
        self.src_data_each_block = 32 // self.src_dtype_bytes_size
        self.src_gm = self.tik_instance.Tensor(self.src_dtype,
                                               self.src_shape,
                                               name="src_gm",
                                               scope=tik.scope_gm)
        self.dst_gm = self.tik_instance.Tensor(self.dst_dtype,
                                               self.dst_shape,
                                               name="dst_gm",
                                               scope=tik.scope_gm)
        self.src_burst_len = 0
        self.burst_len = 0
        self.dst_burst_len = 0
        self.src_ub_number = 0
        self.temp_ub_number = 0
        self.half_ub_number = 0
        self.each_data_num = 0
        self.each_core_data = 0
        self.last_core_data = 0
        self.c_num = 0
        self.i_flag = 0
        self.n0_ni_c0 = (self.src_shape[1] * self.src_shape[2] *
                         self.src_shape[3])
        self.ni_c0 = self.src_shape[2] * self.src_shape[3]
        self.temp_c = None
        self.before_c = None
        self.core_loop_index = None
        self.src_ub = None
        self.temp_ub = None
        self.tile_ub = None
        self.temp_burst_len = None
        self.remain_data = None
        self.temp_data = None
        ai_core_num = tbe_platform.cce_conf.get_soc_spec(
            tbe_platform.cce_conf.CORE_NUM)
        self.c_each_core = math.ceil(self.src_shape[0] / ai_core_num)
        self.core_num = math.ceil(self.src_shape[0] / self.c_each_core)
        self.each_core_data = self.c_each_core * self.n0_ni_c0
        self.c_last_core = (self.src_shape[0] - self.c_each_core *
                            (self.core_num - 1))
        self.last_core_data = self.c_last_core * self.n0_ni_c0
        data_num = (self.i % (self.c_each_core * 16)) * self.dst_shape[3]
        if (data_num < self.src_data_each_block) and data_num > 0:
            self.core_num = self.change_core_num()
            self.each_core_data = self.c_each_core * self.n0_ni_c0
            self.c_last_core = (self.src_shape[0] - self.c_each_core *
                                (self.core_num - 1))
            self.last_core_data = self.c_last_core * self.n0_ni_c0
        self.check_param()
Ejemplo n.º 10
0
def fake_quant_per_layer(x,
                         min_val,
                         max_val,
                         y,
                         symmetric,
                         narrow_range,
                         num_bits,
                         kernel_name="fake_quant_per_layer"):
    """FakeQuantPerLayer"""
    input_shape = x.get("shape")
    input_dtype = x.get("dtype")
    min_shape = min_val.get("ori_shape")
    min_dtype = min_val.get("dtype")
    max_shape = max_val.get("ori_shape")
    max_dtype = max_val.get("dtype")

    min_shape = util.scalar2tensor_one(min_shape)
    max_shape = util.scalar2tensor_one(max_shape)
    util.check_kernel_name(kernel_name)
    util.check_shape_rule(input_shape)
    util.check_shape_rule(min_shape, 1, 1, 1)
    util.check_shape_rule(max_shape, 1, 1, 1)
    util.check_tensor_shape_size(input_shape)
    util.check_tensor_shape_size(min_shape)
    util.check_tensor_shape_size(max_shape)

    check_list = ["float32", "float16"]
    x_dtype = input_dtype.lower()
    min_dtype = min_dtype.lower()
    max_dtype = max_dtype.lower()
    util.check_dtype_rule(x_dtype, check_list)
    util.check_dtype_rule(min_dtype, check_list)
    util.check_dtype_rule(max_dtype, check_list)

    input_shape = (functools_reduce(lambda x, y: x * y, input_shape[:]), )
    shape_min, _, _ = util.produce_shapes(min_shape, input_shape)

    if symmetric:
        quant_min = 0 - 2**(num_bits - 1)
        quant_max = 2**(num_bits - 1) - 1
    else:
        quant_min = 0
        quant_max = 2**num_bits - 1
    if narrow_range:
        quant_min = quant_min + 1

    input_data = tvm.placeholder(input_shape, name="x", dtype=x_dtype)
    min_data = tvm.placeholder(shape_min, name="min_data", dtype=min_dtype)
    max_data = tvm.placeholder(shape_min, name="max_data", dtype=max_dtype)
    res = fake_quant_per_layer_compute(input_data, min_data, max_data, y,
                                       quant_min, quant_max, kernel_name)

    with tvm.target.cce():
        sch = generic.auto_schedule(res)

    tensor_list = [input_data, min_data, max_data, res]
    config = {
        "print_ir": False,
        "name": kernel_name,
        "tensor_list": tensor_list
    }

    te.lang.cce.cce_build_code(sch, config)