예제 #1
0
파일: process.py 프로젝트: slee1009/liam2
    def store_result(self, result, context):
        if isinstance(result, (np.ndarray, la.LArray)):
            res_type = result.dtype.type
        else:
            res_type = type(result)

        if self.temporary:
            target = self.entity.temp_variables
        else:
            # we cannot store/cache self.entity.array[self.name] because the
            # array object can change (eg when enlarging it due to births)
            target = self.entity.array
            result = np.asarray(result)
            # TODO: assert type for temporary variables too
            target_type_idx = type_to_idx[target[self.name].dtype.type]
            res_type_idx = type_to_idx[res_type]
            if res_type_idx > target_type_idx:
                raise Exception(
                    "trying to store %s value into '%s' field which is of "
                    "type %s" % (idx_to_type[res_type_idx].__name__, self.name,
                                 idx_to_type[target_type_idx].__name__))

        # the whole column is updated
        target[self.name] = result

        # invalidate cache
        period = context.period
        if isinstance(period, np.ndarray):
            assert np.isscalar(period) or not period.shape
            period = int(period)
        expr_cache.invalidate(period, context.entity_name,
                              Variable(self.entity, self.name))
예제 #2
0
파일: exprmisc.py 프로젝트: slee1009/liam2
    def compute(self, context, coefficients):
        assert isinstance(coefficients, la.LArray)

        # XXX: change to "variable"? because we can use temporary variables too!
        #      or even to "expressions" if we want to support expressions.
        # FIXME013: in any case, it should be singular
        field_axis = coefficients.axes['fields']
        other_axes = coefficients.axes - field_axis

        expr = None
        # XXX: instead of retrieving labels along a dimension & splitting manually,
        #      we should have a "split" operation in LArray (opposite of stack)
        for name in field_axis.labels:
            coef_value = coefficients[name]

            # automatically index other (remaining) dimensions
            if other_axes:
                coef_value = index_array_by_variables(coef_value, context,
                                                      other_axes)

            coef_var = self.add_tmp_var(context, coef_value)
            if name != 'constant':
                # XXX: should I reuse variables instances defined in the entity at
                # context.entity.variables[name]
                # XXX: parse expressions instead of only simple Variable?
                term = _mul(Variable(context.entity, name), coef_var)
            else:
                term = coef_var
            if expr is None:
                expr = term
            else:
                expr = _plus(expr, term)
        return expr_eval(expr, context)
예제 #3
0
 def build_regression_expr(self, expr, mult=0.0, error_var=None):
     if error_var is not None:
         # expr += error_var
         expr = BinaryOp('+', expr, Variable(None, error_var))
     if mult:
         # expr += normal(0, 1) * mult
         expr = BinaryOp('+', expr, BinaryOp('*', Normal(0, 1), mult))
     return expr
예제 #4
0
파일: alignment.py 프로젝트: slee1009/liam2
    def _eval_need(self,
                   context,
                   need,
                   expressions,
                   possible_values,
                   expressions_context=None):
        assert isinstance(need, (np.ndarray, la.LArray))
        if expressions_context is None:
            expressions_context = context
        # When given a 0d array, we convert it to 1d. This can happen e.g. for
        # >>> b = True; x = ne.evaluate('where(b, 0.1, 0.2)')
        # >>> isinstance(x, np.ndarray)
        # True
        # >>> x.shape
        # ()
        if not need.shape:
            need = np.array([need])

        if isinstance(need, la.LArray):
            if not expressions:
                expressions = [
                    Variable(expressions_context.entity, name)
                    for name in need.axes.names
                ]
            if not possible_values:
                possible_values = need.axes.labels

        assert isinstance(need, (np.ndarray, la.LArray))

        if len(expressions) != len(possible_values):
            raise Exception("align() expressions and possible_values "
                            "have different length: %d vs %d" %
                            (len(expressions), len(possible_values)))

        if 'period' in [str(e) for e in expressions]:
            period = context.period
            expressions, possible_values, need = \
                kill_axis('period', period, expressions, possible_values, need)

        # kill any axis where the value is constant for all individuals
        # satisfying the filter


#        tokill = [(expr, column[0])
#                  for expr, column in zip(expressions, columns)
#                  if isconstant(column, filter_value)]
#        for expr, value in tokill:
#            expressions, possible_values, need = \
#                kill_axis(str(expr), value, expressions, possible_values,
#                          need)

        return need, expressions, possible_values
예제 #5
0
파일: links.py 프로젝트: slee1009/liam2
    def get(self, key, *args, **kwargs):
        if isinstance(key, basestring):
            entity = self._target_entity

            # We could use entity.variables instead but since local variables
            # are not in there (and links can currently point to them), we need
            # to special case that and it does not make things any simpler.
            if key in entity.links:
                key = entity.links[key]
            else:
                key = Variable(entity, key)

        return LinkGet(self, key, *args, **kwargs)
예제 #6
0
파일: links.py 프로젝트: slee1009/liam2
    def __getattr__(self, key):
        if key in self.macros:
            raise Exception("Using macros with the 'other' link is not "
                            "supported yet")


#            macro = self.macros[key]
#            variables = macro.collect_variables()
#            renames = dict((name, self.prefix + name) for name in variables)
#            return macro.rename_variables(renames)
        if key in self.links:
            link = self.links[key]
            # noinspection PyProtectedMember
            return link.__class__(link._name, self.prefix + link._link_field,
                                  link._target_entity_name,
                                  link._target_entity)
        return Variable(self.entity, self.prefix + key)
예제 #7
0
파일: entities.py 프로젝트: jenniebui/liam2
 def get_group_context(context, varnames):
     ent_name = context['__entity__']
     entity = context['__entities__'][ent_name]
     group_context = context.copy()
     entity_context = group_context[ent_name].copy()
     # this creates a Variable for each name in varnames
     # There is an obscure subtle bug here. get_group_context is used both for functions and for
     # "code blocks". For functions where varnames represent arguments, this is probably fine to
     # shadow global variables with local variables, but for code blocks, this means that
     # global VariableMethodHybrid gets replaced by a simple Variable, if you set the value of that
     # global VariableMethodHybrid anywhere in the method. For example, this will fail:
     # age: age + 1
     # ageing:
     # - age: age + 1
     # - age()
     # I will not fix this though as it is too obscure and VariableMethodHybrids should not be used anyway.
     entity_context.update(
         (name, Variable(entity, name)) for name in varnames)
     group_context[ent_name] = entity_context
     return group_context
예제 #8
0
    def execute(self, s):
        entity = self.entity
        if entity is None:
            raise Exception(entity_required)

        period = self.period
        if period is None:
            raise Exception(period_required)

        entity_name = self.entity.name
        parse_ctx = self.parse_ctx.copy()
        local_parse_ctx = parse_ctx[entity_name].copy()

        # add all currently defined temp_variables because otherwise
        # local variables (defined within a function) wouldn't be available
        local_parse_ctx.update((name, Variable(entity, name))
                               for name in entity.temp_variables.keys())
        parse_ctx[entity_name] = local_parse_ctx
        expr = parse(s, parse_ctx, interactive=True)
        result = expr_eval(expr, self.eval_ctx)
        if result is None:
            print("done.")
        return result
예제 #9
0
파일: entities.py 프로젝트: jenniebui/liam2
    def variables(self):
        if self._variables is None:
            if self.process_strings:
                processes = list(self.process_strings.items())
            else:
                processes = []

            # names of all processes (hybrid or not) of the entity
            process_names = set(k for k, v in processes if k is not None)

            # names of all entity variables (temporary or not) which are set
            # globally
            all_entity_variables = set(self.collect_predictors(processes))

            field_names = set(self.fields.names)

            # normal fields (non-callable/no hybrid variable-function for them)
            variables = dict((name, Variable(self, name, type_))
                             for name, type_ in self.fields.name_types
                             if name in field_names - process_names)

            if config.debug:
                print("hybrids (field and method):",
                      field_names & process_names)
            # callable fields (fields with a process of the same name)
            variables.update((name, VariableMethodHybrid(self, name, type_))
                             for name, type_ in self.fields.name_types
                             if name in field_names & process_names)
            if config.debug:
                print("hybrids (global temporary & method):",
                      all_entity_variables - field_names)
            # global temporaries (they are all callable).
            variables.update((name, VariableMethodHybrid(self, name))
                             for name in all_entity_variables - field_names)
            variables.update(self.links)
            self._variables = variables
        return self._variables
예제 #10
0
    def compute(self, context, a, size=None, replace=True, p=None):
        if isinstance(a, la.LArray):
            assert p is None
            outcomes_axis = a.axes['outcomes']
            outcomes = outcomes_axis.labels
            other_axes = a.axes - outcomes_axis

            if other_axes:
                a = index_array_by_variables(a, context, other_axes)
                p = np.asarray(a.transpose('outcomes'))
            else:
                p = np.asarray(a)
            a = outcomes

        if isinstance(p,
                      (list, np.ndarray)) and len(p) and not np.isscalar(p[0]):
            assert len(p) == len(a)
            assert all(len(px) == size for px in p)
            assert len(a) >= 2

            if isinstance(p, list) and any(
                    isinstance(px, la.LArray) for px in p):
                p = [np.asarray(px) for px in p]
            ap = np.asarray(p)
            cdf = ap.cumsum(axis=0)

            # copied & adapted from numpy/random/mtrand/mtrand.pyx
            atol = np.sqrt(np.finfo(np.float64).eps)
            if np.issubdtype(ap.dtype, np.floating):
                atol = max(atol, np.sqrt(np.finfo(ap.dtype).eps))

            if np.any(np.abs(cdf[-1] - 1.) > atol):
                raise ValueError("probabilities do not sum to 1")

            cdf /= cdf[-1]

            # I have not found a way to do this without an explicit loop as
            # np.digitize only supports a 1d array for bins. What we do is
            # worse than a linear "search" since we always evaluate all
            # possibilities (there is no shortcut when the value is found).
            # It might be faster to rewrite this using numba + np.digitize
            # for each individual (assuming it has a low setup overhead).

            # the goal is to build something like:
            # if(u < proba1, outcome1,
            #    if(u < proba2, outcome2,
            #       outcome3))

            data = {'u': np.random.uniform(size=size)}
            expr = a[-1]
            # iterate in reverse and skip last
            pairs = zip(cdf[-2::-1], a[-2::-1])
            for i, (proba_x, outcome_x) in enumerate(pairs):
                data['p%d' % i] = proba_x
                expr = Where(
                    ComparisonOp('<', Variable(None, 'u'),
                                 Variable(None, 'p%d' % i)), outcome_x, expr)
            local_ctx = context.clone(fresh_data=True, entity_data=data)
            return expr.evaluate(local_ctx)
        else:
            return NumpyRandom.compute(self, context, a, size, replace, p)
예제 #11
0
파일: links.py 프로젝트: slee1009/liam2
 def traverse(self):
     # XXX: don't we also need the fields within the target expression?
     # noinspection PyProtectedMember
     yield Variable(self.link._entity, self.link._link_field)
     yield self
예제 #12
0
파일: exprmisc.py 프로젝트: slee1009/liam2
    def compute(self, context, *args, **kwargs):
        filter_value = kwargs.pop('filter', None)
        missing = kwargs.pop('missing', None)
        # periods = kwargs.pop('periods', None)
        header = kwargs.pop('header', True)
        limit = kwargs.pop('limit', None)
        entity = context.entity

        if args:
            expressions = list(args)
        else:
            # extra=False because we don't want globals nor "system" variables
            # (nan, period, __xxx__)
            # FIXME: we should also somehow "traverse" expressions in this case
            # too (args is ()) => all keys in the current context
            expressions = [
                Variable(entity, name) for name in context.keys(extra=False)
            ]

        str_expressions = [str(e) for e in expressions]
        if 'id' not in str_expressions:
            str_expressions.insert(0, 'id')
            expressions.insert(0, Variable(entity, 'id'))
            id_pos = 0
        else:
            id_pos = str_expressions.index('id')

        #        if (self.periods is not None and len(self.periods) and
        #            'period' not in str_expressions):
        #            str_expressions.insert(0, 'period')
        #            expressions.insert(0, Variable('period'))
        #            id_pos += 1

        columns = []
        for expr in expressions:
            if filter_value is False:
                # dtype does not matter much
                expr_value = np.empty(0)
            else:
                # TODO: set filter before evaluating expressions
                expr_value = expr_eval(expr, context)
                if (filter_value is not None
                        and isinstance(expr_value, np.ndarray)
                        and expr_value.shape):
                    expr_value = expr_value[filter_value]
            columns.append(expr_value)

        ids = columns[id_pos]
        if isinstance(ids, np.ndarray) and ids.shape:
            numrows = len(ids)
        else:
            # FIXME: we need a test for this case (no idea how this can happen)
            numrows = 1

        # expand scalar columns to full columns in memory
        for idx, col in enumerate(columns):
            dtype = None
            if not isinstance(col, np.ndarray):
                dtype = type(col)
            elif not col.shape:
                dtype = col.dtype.type

            if dtype is not None:
                # TODO: try using itertools.repeat instead as it seems to be a
                # bit faster and would consume less memory (however, it might
                # not play very well with Pandas.to_csv)
                newcol = np.full(numrows, col, dtype=dtype)
                columns[idx] = newcol
            elif col.ndim > 1:
                # move last axis (should be id axis) first
                # np.moveaxis requires numpy >= 1.11
                # columns[idx] = np.moveaxis(col, -1, 0)
                columns[idx] = col.transpose((-1, ) +
                                             tuple(range(col.ndim - 1)))

        assert all(isinstance(col, np.ndarray) for col in columns)
        bad_lengths = {
            str_expr: col.shape
            for col, str_expr in zip(columns, str_expressions)
            if col.shape[0] != numrows
        }
        if bad_lengths:
            raise ValueError(
                "first dimension of some columns are not the same length as the id column (%d): %s"
                % (numrows, str(bad_lengths)))

        if limit is not None:
            assert isinstance(limit, (int, long))
            columns = [col[:limit] for col in columns]

        # Transform to Python lists of normal Python types (ie no numpy types).
        # on py2, csv.writer uses repr(value) for float and str(value) for other but
        # on py3 since str(float) == repr(float), they switched to str(value) for everything
        # but str(np.float64) does not have full precision (truncated at the 12th decimal)
        # besides, this seems to be faster (but probably takes more memory).
        # Also on python2, converting produces nicer/shorter float strings (see issue #225).
        columns = [c.tolist() for c in columns]
        data = zip(*columns)
        if header:
            table = [str_expressions]
            table.extend(data)
        else:
            table = list(data)
        return PrettyTable(table, missing)