Beispiel #1
0
 def _analyze_call_set_REP(self, lhs, args, array_dists, fdef=None):
     for v in args:
         if (is_array(self.typemap, v.name)
                 or is_array_container(self.typemap, v.name)
                 or isinstance(self.typemap[v.name], DataFrameType)):
             dprint("dist setting call arg REP {} in {}".format(
                 v.name, fdef))
             array_dists[v.name] = Distribution.REP
     if (is_array(self.typemap, lhs)
             or is_array_container(self.typemap, lhs)
             or isinstance(self.typemap[lhs], DataFrameType)):
         dprint("dist setting call out REP {} in {}".format(lhs, fdef))
         array_dists[lhs] = Distribution.REP
Beispiel #2
0
 def _set_REP(self, var_list, array_dists):
     for var in var_list:
         varname = var.name
         # Handle SeriesType since it comes from Arg node and it could
         # have user-defined distribution
         if (is_array(self.typemap, varname)
                 or is_array_container(self.typemap, varname)
                 or isinstance(self.typemap[varname],
                               (SeriesType, DataFrameType))):
             dprint("dist setting REP {}".format(varname))
             array_dists[varname] = Distribution.REP
         # handle tuples of arrays
         var_def = guard(get_definition, self.func_ir, var)
         if (var_def is not None and isinstance(var_def, ir.Expr)
                 and var_def.op == 'build_tuple'):
             tuple_vars = var_def.items
             self._set_REP(tuple_vars, array_dists)
Beispiel #3
0
    def _analyze_assign(self, inst, array_dists, parfor_dists):
        lhs = inst.target.name
        rhs = inst.value
        # treat return casts like assignments
        if isinstance(rhs, ir.Expr) and rhs.op == 'cast':
            rhs = rhs.value

        if isinstance(rhs,
                      ir.Var) and (is_array(self.typemap, lhs)
                                   or isinstance(self.typemap[lhs],
                                                 (SeriesType, DataFrameType))
                                   or is_array_container(self.typemap, lhs)):
            self._meet_array_dists(lhs, rhs.name, array_dists)
            return
        elif (is_array(self.typemap, lhs) and isinstance(rhs, ir.Expr)
              and rhs.op == 'inplace_binop'):
            # distributions of all 3 variables should meet (lhs, arg1, arg2)
            arg1 = rhs.lhs.name
            arg2 = rhs.rhs.name
            dist = self._meet_array_dists(arg1, arg2, array_dists)
            dist = self._meet_array_dists(arg1, lhs, array_dists, dist)
            self._meet_array_dists(arg1, arg2, array_dists, dist)
            return
        elif isinstance(rhs,
                        ir.Expr) and rhs.op in ['getitem', 'static_getitem']:
            self._analyze_getitem(inst, lhs, rhs, array_dists)
            return
        elif isinstance(rhs, ir.Expr) and rhs.op == 'build_tuple':
            # parallel arrays can be packed and unpacked from tuples
            # e.g. boolean array index in test_getitem_multidim
            return
        elif (isinstance(rhs, ir.Expr) and rhs.op == 'getattr'
              and rhs.attr == 'T' and is_array(self.typemap, lhs)):
            # array and its transpose have same distributions
            arr = rhs.value.name
            self._meet_array_dists(lhs, arr, array_dists)
            # keep lhs in table for dot() handling
            self._T_arrs.add(lhs)
            return
        elif (isinstance(rhs, ir.Expr) and rhs.op == 'getattr'
              and isinstance(self.typemap[rhs.value.name], DataFrameType)
              and rhs.attr == 'to_csv'):
            return
        elif (isinstance(rhs, ir.Expr) and rhs.op == 'getattr'
              and rhs.attr in [
                  'shape', 'ndim', 'size', 'strides', 'dtype', 'itemsize',
                  'astype', 'reshape', 'ctypes', 'transpose', 'tofile', 'copy'
              ]):
            pass  # X.shape doesn't affect X distribution
        elif isinstance(rhs, ir.Expr) and rhs.op == 'call':
            self._analyze_call(lhs, rhs, rhs.func.name, rhs.args, array_dists)
        # handle for A in arr_container: ...
        # A = pair_first(iternext(getiter(arr_container)))
        # TODO: support getitem of container
        elif isinstance(rhs, ir.Expr) and rhs.op == 'pair_first' and is_array(
                self.typemap, lhs):
            arr_container = guard(_get_pair_first_container, self.func_ir, rhs)
            if arr_container is not None:
                self._meet_array_dists(lhs, arr_container.name, array_dists)
                return
        elif isinstance(rhs, ir.Expr) and rhs.op in ('getiter', 'iternext'):
            # analyze array container access in pair_first
            return

        elif isinstance(rhs, ir.Arg):
            distributed_key = 'distributed'
            threaded_key = 'threaded'

            if distributed_key not in self.metadata.keys():
                self.metadata[distributed_key] = {}

            if threaded_key not in self.metadata.keys():
                self.metadata[threaded_key] = {}

            if rhs.name in self.metadata[distributed_key]:
                if lhs not in array_dists:
                    array_dists[lhs] = Distribution.OneD

            elif rhs.name in self.metadata[threaded_key]:
                if lhs not in array_dists:
                    array_dists[lhs] = Distribution.Thread

            else:
                dprint("replicated input ", rhs.name, lhs)
                self._set_REP([inst.target], array_dists)

        else:
            self._set_REP(inst.list_vars(), array_dists)
        return