Example #1
0
    def plug_in_body(self, body=u"", conditions=None, cmv=None,
                     ids_path=None, subtree=True, attr=None):
        """Translate the model into a set of "if" statemets in tableau syntax

        `depth` controls the size of indentation. As soon as a value is missing
        that node is returned without further evaluation.

        """

        if cmv is None:
            cmv = []

        if body:
             alternate = u"ELSEIF"
        else:
            if conditions is None:
                conditions = []
            alternate = u"IF"

        children = filter_nodes(self.children, ids=ids_path,
                                subtree=subtree)
        if children:

            field = split(children)
            has_missing_branch = (missing_branch(children) or
                                  none_value(children))
            # the missing is singled out as a special case only when there's
            # no missing branch in the children list
            one_branch = not has_missing_branch or \
                self.fields[field]['optype'] in COMPOSED_FIELDS
            if (one_branch and
                    not self.fields[field]['name'] in cmv):
                body += self.missing_check_code(field, alternate, cmv,
                                                conditions, attr=attr)
                alternate = u"ELSEIF"

            for child in children:
                pre_condition = u""
                post_condition = u""
                if has_missing_branch and child.predicate.value is not None:
                    pre_condition = self.missing_prefix_code(child, field, cmv)
                    post_condition = u")"

                child.split_condition_code(field, conditions,
                                           pre_condition, post_condition)

                body = child.plug_in_body(body, conditions[:], cmv=cmv[:],
                                          ids_path=ids_path, subtree=subtree,
                                          attr=attr)
                del conditions[-1]
        else:
            if attr is None:
                value = value_to_print( \
                    self.output, self.fields[self.objective_id]['optype'])
            else:
                value = getattr(self, attr)
            body += u"%s %s THEN" % (alternate, " AND ".join(conditions))
            body += u" %s\n" % value

        return body
Example #2
0
    def plug_in_body(self, body=u"", conditions=None, cmv=None,
                     ids_path=None, subtree=True, attr=None):
        """Translate the model into a set of "if" statemets in tableau syntax

        `depth` controls the size of indentation. As soon as a value is missing
        that node is returned without further evaluation.

        """

        if cmv is None:
            cmv = []

        if body:
             alternate = u"ELSEIF"
        else:
            if conditions is None:
                conditions = []
            alternate = u"IF"

        children = filter_nodes(self.children, ids=ids_path,
                                subtree=subtree)
        if children:

            field = split(children)
            has_missing_branch = (missing_branch(children) or
                                  none_value(children))
            # the missing is singled out as a special case only when there's
            # no missing branch in the children list
            one_branch = not has_missing_branch or \
                self.fields[field]['optype'] in COMPOSED_FIELDS
            if (one_branch and
                    not self.fields[field]['name'] in cmv):
                body += self.missing_check_code(field, alternate, cmv,
                                                conditions, attr=attr)
                alternate = u"ELSEIF"

            for child in children:
                pre_condition = u""
                post_condition = u""
                if has_missing_branch and child.predicate.value is not None:
                    pre_condition = self.missing_prefix_code(child, field, cmv)
                    post_condition = u")"

                child.split_condition_code(field, conditions,
                                           pre_condition, post_condition)

                body = child.plug_in_body(body, conditions[:], cmv=cmv[:],
                                          ids_path=ids_path, subtree=subtree,
                                          attr=attr)
                del conditions[-1]
        else:
            if attr is None:
                value = value_to_print( \
                    self.output, self.fields[self.objective_id]['optype'])
            else:
                value = getattr(self, attr)
            body += u"%s %s THEN" % (alternate, " AND ".join(conditions))
            body += u" %s\n" % value

        return body
Example #3
0
    def generate_rules(self, depth=0, ids_path=None, subtree=True):
        """Translates a tree model into a set of IF-THEN rules.

        """
        rules = u""
        children = filter_nodes(self.children, ids=ids_path, subtree=subtree)
        if children:
            for child in children:
                rules += (u"%s IF %s %s\n" %
                          (INDENT * depth,
                           child.predicate.to_rule(self.fields, 'slug'),
                           "AND" if child.children else "THEN"))
                rules += child.generate_rules(depth + 1,
                                              ids_path=ids_path,
                                              subtree=subtree)
        else:
            rules += (u"%s %s = %s\n" %
                      (INDENT * depth,
                       (self.fields[self.objective_id]['slug']
                        if self.objective_id else "Prediction"), self.output))
        return rules
Example #4
0
    def generate_rules(self, depth=0, ids_path=None, subtree=True):
        """Translates a tree model into a set of IF-THEN rules.

        """
        rules = u""
        children = filter_nodes(self.children, ids=ids_path,
                                subtree=subtree)
        if children:
            for child in children:
                rules += (u"%s IF %s %s\n" %
                          (INDENT * depth,
                           child.predicate.to_rule(self.fields, 'slug'),
                           "AND" if child.children else "THEN"))
                rules += child.generate_rules(depth + 1, ids_path=ids_path,
                                              subtree=subtree)
        else:
            rules += (u"%s %s = %s\n" %
                      (INDENT * depth,
                       (self.fields[self.objective_id]['slug']
                        if self.objective_id else "Prediction"),
                       self.output))
        return rules
Example #5
0
    def plug_in_body(self, depth=1, cmv=None, ids_path=None, subtree=True):
        """Translate the model into a set of "if" javascript statements.

        `depth` controls the size of indentation. As soon as a value is missing
        to evaluate a predicate the output at that node is returned without
        further evaluation.

        """
        metric = "error" if self.regression else "confidence"
        if cmv is None:
            cmv = []
        body = u""
        term_analysis_fields = []
        item_analysis_fields = []
        prefix = u""
        field_obj = self.fields[self.objective_id]

        if len(self.fields) > MAX_ARGS_LENGTH:
            prefix = u"data."
        children = filter_nodes(self.children, ids=ids_path,
                                subtree=subtree)

        if children:

            # field used in the split
            field = split(children)

            has_missing_branch = missing_branch(children)
            # the missing is singled out as a special case only when there's
            # no missing branch in the children list
            one_branch = not has_missing_branch or \
                self.fields[field]['optype'] in COMPOSED_FIELDS
            if (one_branch and
                    not self.fields[field]['camelCase'] in cmv):
                body += self.missing_check_code(field, depth, prefix, cmv,
                                                metric)

            for child in children:

                field = child.predicate.field

                pre_condition = u""
                # code when missing_splits has been used
                if has_missing_branch and child.predicate.value is not None:
                    pre_condition = self.missing_prefix_code(child, field,
                                                             prefix, cmv)

                # complete split condition code
                body += child.split_condition_code( \
                    field, depth, prefix, pre_condition,
                    term_analysis_fields, item_analysis_fields)

                # value to be determined in next node
                next_level = child.plug_in_body(depth + 1, cmv=cmv[:],
                                                ids_path=ids_path,
                                                subtree=subtree)
                body += next_level[0]
                body += u"%s}\n" % (INDENT * depth)
                term_analysis_fields.extend(next_level[1])
                item_analysis_fields.extend(next_level[2])

        else:
            value = value_to_print(self.output,
                                   self.fields[self.objective_id]['optype'])
            body = u"%sreturn {prediction: %s, %s: %s};\n" % ( \
                INDENT * depth,
                value,
                metric,
                self.confidence)
        return body, term_analysis_fields, item_analysis_fields
Example #6
0
    def plug_in_body(self,
                     depth=1,
                     cmv=None,
                     input_map=False,
                     ids_path=None,
                     subtree=True):
        """Translate the model into a set of "if" python statements.

        `depth` controls the size of indentation. As soon as a value is missing
        that node is returned without further evaluation.

        """
        if cmv is None:
            cmv = []
        body = u""
        term_analysis_fields = []
        item_analysis_fields = []

        children = filter_nodes(self.children, ids=ids_path, subtree=subtree)
        if children:

            # field used in the split
            field = split(children)

            has_missing_branch = (missing_branch(children)
                                  or none_value(children))
            # the missing is singled out as a special case only when there's
            # no missing branch in the children list
            one_branch = not has_missing_branch or \
                self.fields[field]['optype'] in COMPOSED_FIELDS
            if (one_branch and not self.fields[field]['slug'] in cmv):
                body += self.missing_check_code(field, depth, input_map, cmv)

            for child in children:
                field = child.predicate.field
                pre_condition = u""
                # code when missing_splits has been used
                if has_missing_branch and child.predicate.value is not None:
                    pre_condition = self.missing_prefix_code(
                        child, field, input_map, cmv)

                # complete split condition code
                body += child.split_condition_code( \
                    field, depth, input_map, pre_condition,
                    term_analysis_fields, item_analysis_fields)

                # value to be determined in next node
                next_level = child.plug_in_body(depth + 1,
                                                cmv=cmv[:],
                                                input_map=input_map,
                                                ids_path=ids_path,
                                                subtree=subtree)

                body += next_level[0]
                term_analysis_fields.extend(next_level[1])
                item_analysis_fields.extend(next_level[2])
        else:
            value = value_to_print(self.output, "numeric")
            body = u"%sreturn {\"prediction\":%s" % (INDENT * depth, value)
            if hasattr(self, "probability"):
                body += u", \"probability\": %s" % self.probability
            body += u"}\n"

        return body, term_analysis_fields, item_analysis_fields
Example #7
0
    def plug_in_body(self,
                     depth=0,
                     cmv=None,
                     ids_path=None,
                     subtree=True,
                     body=u"",
                     attr=None):
        """Translate the model into a mysql function

        `depth` controls the size of indentation. As soon as a value is missing
        that node is returned without further evaluation.
        `attr` is used to decide the value returned by the function. When
        it's set to None, the prediction is returned. When set to the
        name of an attribute (e.g. 'confidence') this attribute is returned

        """

        if cmv is None:
            cmv = []

        if body:
            alternate = u",\n%sIF (" % (depth * INDENT)
        else:
            alternate = u"IF ("
        post_missing_body = u""

        children = filter_nodes(self.children, ids=ids_path, subtree=subtree)
        if children:

            # field used in the split
            field = split(children)

            has_missing_branch = (missing_branch(children)
                                  or none_value(children))
            # the missing is singled out as a special case only when there's
            # no missing branch in the children list
            if (not has_missing_branch
                    and not self.fields[field]['name'] in cmv):
                body += self.missing_check_code(field, alternate, cmv, attr)
                depth += 1
                alternate = u",\n%sIF (" % (depth * INDENT)
                post_missing_body += u")"

            for child in children:
                pre_condition = u""
                # code when missing splits has been used
                if has_missing_branch and child.predicate.value is not None:
                    pre_condition = self.missing_prefix_code(child, field, cmv)

                # complete split condition code
                body += child.split_condition_code( \
                    field, alternate, pre_condition)

                depth += 1
                alternate = u",\n%sIF (" % (depth * INDENT)
                body = child.plug_in_body(depth,
                                          cmv=cmv[:],
                                          ids_path=ids_path,
                                          subtree=subtree,
                                          body=body,
                                          attr=attr)
            body += u", NULL))" + post_missing_body
            post_missing_body = u""
        else:
            if attr is None:
                value = value_to_print( \
                    self.output, self.fields[self.objective_id]['optype'])
            else:
                value = getattr(self, attr)
            body += u", %s" % (value)

        return body
Example #8
0
    def plug_in_body(self,
                     ids_path=None,
                     subtree=True,
                     prefix=None,
                     metric=CONFIDENCE):
        """Translate the model into a set of functions, one per node, that
        contain only if statements and function calls

        `depth` controls the size of indentation. As soon as a value is missing
        that node is returned without further evaluation.

        """
        # label for the confidence measure and initialization
        metric = metric if metric in CONFIDENCE_METRICS else "confidence"

        if prefix is None:
            prefix = ""
        term_analysis_fields = []
        item_analysis_fields = []
        functions = []

        nodes = filter_nodes(self.nodes.values(),
                             ids=ids_path,
                             subtree=subtree)
        if nodes:

            for node in nodes:
                depth = 1
                body = u"%sdef %sn_%s(data):\n" % (INDENT * depth, prefix,
                                                   node.id)
                depth += 1

                children = [self.nodes[key] for key in node.children]
                if children:

                    # field used in the split
                    field = children[0].predicate.field

                    has_missing_branch = (missing_branch(children)
                                          or none_value(children))
                    # the missing is singled out as a special case only when
                    # there's no missing branch in the children list
                    one_branch = not has_missing_branch or \
                        self.fields[field]['optype'] in COMPOSED_FIELDS
                    if one_branch:
                        body += self.missing_check_code( \
                            field, node, depth, metric)

                    condition = True

                    for child in children:

                        if condition:  # only first child has if condition
                            field = child.predicate.field
                            pre_condition = u""
                            # code when missing_splits has been used
                            if has_missing_branch and child.predicate.value \
                                    is not None:
                                pre_condition = self.missing_prefix_code( \
                                    child, field)

                            # complete split condition code
                            body += self.split_condition_code( \
                                field, child, depth, pre_condition,
                                term_analysis_fields, item_analysis_fields)

                        # body += next_level[0]
                        depth += 1
                        body += "%sreturn %sn_%s(data)\n" % \
                            (INDENT * depth, prefix, child.id)
                        depth -= 2
                        condition = False
                else:
                    value = value_to_print( \
                        node.output,
                        self.fields[self.objective_id]['optype'])
                    body += u"%sreturn {\"prediction\":%s, \"%s\":%s}\n" % ( \
                        INDENT * depth, value, metric, getattr(node, metric))
                    depth -= 1

                functions.append(body)

        return functions, term_analysis_fields, item_analysis_fields
Example #9
0
    def tableau_body(self,
                     body=u"",
                     conditions=None,
                     cmv=None,
                     ids_path=None,
                     subtree=True):
        """Translate the model into a set of "if" statements in Tableau syntax

        `depth` controls the size of indentation. As soon as a value is missing
        that node is returned without further evaluation.

        """

        if cmv is None:
            cmv = []
        if body:
            alternate = u"ELSEIF"
        else:
            if conditions is None:
                conditions = []
            alternate = u"IF"

        children = filter_nodes(self.children, ids=ids_path, subtree=subtree)
        if children:
            field = split(children)
            has_missing_branch = (missing_branch(children)
                                  or none_value(children))
            # the missing is singled out as a special case only when there's
            # no missing branch in the children list
            if (not has_missing_branch
                    and self.fields[field]['name'] not in cmv):
                conditions.append("ISNULL([%s])" % self.fields[field]['name'])
                body += (u"%s %s THEN " %
                         (alternate, " AND ".join(conditions)))
                if self.fields[self.objective_id]['optype'] == 'numeric':
                    value = self.output
                else:
                    value = tableau_string(self.output)
                body += (u"%s\n" % value)
                cmv.append(self.fields[field]['name'])
                alternate = u"ELSEIF"
                del conditions[-1]

            for child in children:
                pre_condition = u""
                post_condition = u""
                if has_missing_branch and child.predicate.value is not None:
                    negation = u"" if child.predicate.missing else u"NOT "
                    connection = u"OR" if child.predicate.missing else u"AND"
                    pre_condition = (
                        u"(%sISNULL([%s]) %s " %
                        (negation, self.fields[field]['name'], connection))
                    if not child.predicate.missing:
                        cmv.append(self.fields[field]['name'])
                    post_condition = u")"
                optype = self.fields[child.predicate.field]['optype']
                if child.predicate.value is None:
                    value = ""
                elif optype == 'text' or optype == 'items':
                    return u""
                elif optype == 'numeric':
                    value = child.predicate.value
                else:
                    value = repr(child.predicate.value)

                operator = (u"" if child.predicate.value is None else
                            PYTHON_OPERATOR[child.predicate.operator])
                if child.predicate.value is None:
                    pre_condition = (
                        T_MISSING_OPERATOR[child.predicate.operator])
                    post_condition = u")"

                conditions.append(
                    "%s[%s]%s%s%s" %
                    (pre_condition, self.fields[child.predicate.field]['name'],
                     operator, value, post_condition))
                body = child.tableau_body(body,
                                          conditions[:],
                                          cmv=cmv[:],
                                          ids_path=ids_path,
                                          subtree=subtree)
                del conditions[-1]
        else:
            if self.fields[self.objective_id]['optype'] == 'numeric':
                value = self.output
            else:
                value = tableau_string(self.output)
            body += (u"%s %s THEN" % (alternate, " AND ".join(conditions)))
            body += u" %s\n" % value

        return body
Example #10
0
    def python_body(self,
                    depth=1,
                    cmv=None,
                    input_map=False,
                    ids_path=None,
                    subtree=True):
        """Translate the model into a set of "if" python statements.

        `depth` controls the size of indentation. As soon as a value is missing
        that node is returned without further evaluation.

        """
        def map_data(field, missing=False):
            """Returns the subject of the condition in map format when
               more than MAX_ARGS_LENGTH arguments are used.
            """
            if input_map:
                if missing:
                    return "data.get('%s')" % field
                else:
                    return "data['%s']" % field
            return field

        if cmv is None:
            cmv = []
        body = u""
        term_analysis_fields = []
        item_analysis_fields = []
        children = filter_nodes(self.children, ids=ids_path, subtree=subtree)
        if children:
            field = split(children)
            has_missing_branch = (missing_branch(children)
                                  or none_value(children))
            # the missing is singled out as a special case only when there's
            # no missing branch in the children list
            if not has_missing_branch and \
                    self.fields[field]["optype"] not in ["text", "items"] and \
                    self.fields[field]['slug'] not in cmv:
                body += (u"%sif (%s is None):\n" %
                         (INDENT * depth,
                          map_data(self.fields[field]['slug'], True)))
                if self.fields[self.objective_id]['optype'] == 'numeric':
                    value = self.output
                else:
                    value = repr(self.output)
                body += (u"%sreturn %s\n" % (INDENT * (depth + 1), value))
                cmv.append(self.fields[field]['slug'])

            for child in children:
                field = child.predicate.field
                pre_condition = u""
                if has_missing_branch and child.predicate.value is not None:
                    negation = u"" if child.predicate.missing else u" not"
                    connection = u"or" if child.predicate.missing else u"and"
                    pre_condition = (u"%s is%s None %s " %
                                     (map_data(self.fields[field]['slug'],
                                               True), negation, connection))
                    if not child.predicate.missing:
                        cmv.append(self.fields[field]['slug'])
                optype = self.fields[field]['optype']
                if (optype == 'numeric' or optype == 'text'
                        or optype == 'items' or child.predicate.value is None):
                    value = child.predicate.value
                else:
                    value = repr(child.predicate.value)
                if optype == 'text' or optype == 'items':
                    if optype == 'text':
                        term_analysis_fields.append(
                            (field, child.predicate.term))
                        matching_function = "term_matches"
                    else:
                        item_analysis_fields.append(
                            (field, child.predicate.term))
                        matching_function = "item_matches"

                    body += (
                        u"%sif (%s%s(%s, \"%s\", %s\"%s\") %s %s):"
                        u"\n" %
                        (INDENT * depth, pre_condition, matching_function,
                         map_data(self.fields[field]['slug'],
                                  False), self.fields[field]['slug'],
                         ('u' if isinstance(child.predicate.term, unicode) else
                          ''), child.predicate.term.replace("\"", "\\\""),
                         PYTHON_OPERATOR[child.predicate.operator], value))
                else:
                    operator = (MISSING_OPERATOR[child.predicate.operator]
                                if child.predicate.value is None else
                                PYTHON_OPERATOR[child.predicate.operator])
                    if child.predicate.value is None:
                        cmv.append(self.fields[field]['slug'])
                    body += (u"%sif (%s%s %s %s):\n" %
                             (INDENT * depth, pre_condition,
                              map_data(self.fields[field]['slug'],
                                       False), operator, value))
                next_level = child.python_body(depth + 1,
                                               cmv=cmv[:],
                                               input_map=input_map,
                                               ids_path=ids_path,
                                               subtree=subtree)
                body += next_level[0]
                term_analysis_fields.extend(next_level[1])
                item_analysis_fields.extend(next_level[2])
        else:
            if self.fields[self.objective_id]['optype'] == 'numeric':
                value = self.output
            else:
                value = repr(self.output)
            body = u"%sreturn %s\n" % (INDENT * depth, value)

        return body, term_analysis_fields, item_analysis_fields
Example #11
0
    def tableau_body(self, body=u"", conditions=None, cmv=None,
                     ids_path=None, subtree=True):
        """Translate the model into a set of "if" statements in Tableau syntax

        `depth` controls the size of indentation. As soon as a value is missing
        that node is returned without further evaluation.

        """

        if cmv is None:
            cmv = []
        if body:
            alternate = u"ELSEIF"
        else:
            if conditions is None:
                conditions = []
            alternate = u"IF"

        children = filter_nodes(self.children, ids=ids_path,
                                subtree=subtree)
        if children:
            field = split(children)
            has_missing_branch = (missing_branch(children) or
                                  none_value(children))
            # the missing is singled out as a special case only when there's
            # no missing branch in the children list
            if (not has_missing_branch and
                    self.fields[field]['name'] not in cmv):
                conditions.append("ISNULL([%s])" % self.fields[field]['name'])
                body += (u"%s %s THEN " %
                         (alternate, " AND ".join(conditions)))
                if self.fields[self.objective_id]['optype'] == 'numeric':
                    value = self.output
                else:
                    value = tableau_string(self.output)
                body += (u"%s\n" % value)
                cmv.append(self.fields[field]['name'])
                alternate = u"ELSEIF"
                del conditions[-1]

            for child in children:
                pre_condition = u""
                post_condition = u""
                if has_missing_branch and child.predicate.value is not None:
                    negation = u"" if child.predicate.missing else u"NOT "
                    connection = u"OR" if child.predicate.missing else u"AND"
                    pre_condition = (
                        u"(%sISNULL([%s]) %s " % (
                            negation, self.fields[field]['name'], connection))
                    if not child.predicate.missing:
                        cmv.append(self.fields[field]['name'])
                    post_condition = u")"
                optype = self.fields[child.predicate.field]['optype']
                if child.predicate.value is None:
                    value = ""
                elif optype == 'text' or optype == 'items':
                    return u""
                elif optype == 'numeric':
                    value = child.predicate.value
                else:
                    value = repr(child.predicate.value)

                operator = (u"" if child.predicate.value is None else
                            PYTHON_OPERATOR[child.predicate.operator])
                if child.predicate.value is None:
                    pre_condition = (
                        T_MISSING_OPERATOR[child.predicate.operator])
                    post_condition = u")"

                conditions.append("%s[%s]%s%s%s" % (
                    pre_condition,
                    self.fields[child.predicate.field]['name'],
                    operator,
                    value,
                    post_condition))
                body = child.tableau_body(body, conditions[:], cmv=cmv[:],
                                          ids_path=ids_path, subtree=subtree)
                del conditions[-1]
        else:
            if self.fields[self.objective_id]['optype'] == 'numeric':
                value = self.output
            else:
                value = tableau_string(self.output)
            body += (
                u"%s %s THEN" % (alternate, " AND ".join(conditions)))
            body += u" %s\n" % value

        return body
Example #12
0
    def python_body(self, depth=1, cmv=None, input_map=False,
                    ids_path=None, subtree=True):
        """Translate the model into a set of "if" python statements.

        `depth` controls the size of indentation. As soon as a value is missing
        that node is returned without further evaluation.

        """

        def map_data(field, missing=False):
            """Returns the subject of the condition in map format when
               more than MAX_ARGS_LENGTH arguments are used.
            """
            if input_map:
                if missing:
                    return "data.get('%s')" % field
                else:
                    return "data['%s']" % field
            return field
        if cmv is None:
            cmv = []
        body = u""
        term_analysis_fields = []
        item_analysis_fields = []
        children = filter_nodes(self.children, ids=ids_path,
                                subtree=subtree)
        if children:
            field = split(children)
            has_missing_branch = (missing_branch(children) or
                                  none_value(children))
            # the missing is singled out as a special case only when there's
            # no missing branch in the children list
            if not has_missing_branch and \
                    self.fields[field]["optype"] not in ["text", "items"] and \
                    self.fields[field]['slug'] not in cmv:
                body += (u"%sif (%s is None):\n" %
                         (INDENT * depth,
                          map_data(self.fields[field]['slug'], True)))
                if self.fields[self.objective_id]['optype'] == 'numeric':
                    value = self.output
                else:
                    value = repr(self.output)
                body += (u"%sreturn %s\n" %
                         (INDENT * (depth + 1),
                          value))
                cmv.append(self.fields[field]['slug'])

            for child in children:
                field = child.predicate.field
                pre_condition = u""
                if has_missing_branch and child.predicate.value is not None:
                    negation = u"" if child.predicate.missing else u" not"
                    connection = u"or" if child.predicate.missing else u"and"
                    pre_condition = (
                        u"%s is%s None %s " % (
                            map_data(self.fields[field]['slug'], True),
                            negation,
                            connection))
                    if not child.predicate.missing:
                        cmv.append(self.fields[field]['slug'])
                optype = self.fields[field]['optype']
                if (optype == 'numeric' or optype == 'text' or
                        optype == 'items'
                        or child.predicate.value is None):
                    value = child.predicate.value
                else:
                    value = repr(child.predicate.value)
                if optype == 'text' or optype == 'items':
                    if optype == 'text':
                        term_analysis_fields.append((field,
                                                     child.predicate.term))
                        matching_function = "term_matches"
                    else:
                        item_analysis_fields.append((field,
                                                     child.predicate.term))
                        matching_function = "item_matches"

                    body += (
                        u"%sif (%s%s(%s, \"%s\", %s\"%s\") %s %s):"
                        u"\n" %
                        (INDENT * depth, pre_condition, matching_function,
                         map_data(self.fields[field]['slug'],
                                  False),
                         self.fields[field]['slug'],
                         ('u' if isinstance(child.predicate.term, unicode)
                          else ''),
                         child.predicate.term.replace("\"", "\\\""),
                         PYTHON_OPERATOR[child.predicate.operator],
                         value))
                else:
                    operator = (MISSING_OPERATOR[child.predicate.operator] if
                                child.predicate.value is None else
                                PYTHON_OPERATOR[child.predicate.operator])
                    if child.predicate.value is None:
                        cmv.append(self.fields[field]['slug'])
                    body += (
                        u"%sif (%s%s %s %s):\n" %
                        (INDENT * depth, pre_condition,
                         map_data(self.fields[field]['slug'],
                                  False),
                         operator,
                         value))
                next_level = child.python_body(depth + 1, cmv=cmv[:],
                                               input_map=input_map,
                                               ids_path=ids_path,
                                               subtree=subtree)
                body += next_level[0]
                term_analysis_fields.extend(next_level[1])
                item_analysis_fields.extend(next_level[2])
        else:
            if self.fields[self.objective_id]['optype'] == 'numeric':
                value = self.output
            else:
                value = repr(self.output)
            body = u"%sreturn %s\n" % (INDENT * depth, value)

        return body, term_analysis_fields, item_analysis_fields
Example #13
0
    def plug_in_body(self, depth=0, cmv=None,
                     ids_path=None, subtree=True, body=u"", attr=None):
        """Translate the model into a mysql function

        `depth` controls the size of indentation. As soon as a value is missing
        that node is returned without further evaluation.
        `attr` is used to decide the value returned by the function. When
        it's set to None, the prediction is returned. When set to the
        name of an attribute (e.g. 'confidence') this attribute is returned

        """

        if cmv is None:
            cmv = []

        if body:
            alternate = u",\n%sIF (" % (depth * INDENT)
        else:
            alternate = u"IF ("
        post_missing_body = u""


        children = filter_nodes(self.children, ids=ids_path,
                                subtree=subtree)
        if children:

            # field used in the split
            field = split(children)

            has_missing_branch = (missing_branch(children) or
                                  none_value(children))
            # the missing is singled out as a special case only when there's
            # no missing branch in the children list
            if (not has_missing_branch and
                    not self.fields[field]['name'] in cmv):
                body += self.missing_check_code(field, alternate, cmv, attr)
                depth += 1
                alternate = u",\n%sIF (" % (depth * INDENT)
                post_missing_body += u")"

            for child in children:
                pre_condition = u""
                # code when missing splits has been used
                if has_missing_branch and child.predicate.value is not None:
                    pre_condition = self.missing_prefix_code(child, field, cmv)

                # complete split condition code
                body += child.split_condition_code( \
                    field, alternate, pre_condition)

                depth += 1
                alternate = u",\n%sIF (" % (depth * INDENT)
                body = child.plug_in_body(depth, cmv=cmv[:],
                                          ids_path=ids_path, subtree=subtree,
                                          body=body, attr=attr)
            body += u", NULL))" + post_missing_body
            post_missing_body = u""
        else:
            if attr is None:
                value = value_to_print( \
                    self.output, self.fields[self.objective_id]['optype'])
            else:
                value = getattr(self, attr)
            body += u", %s" % (value)

        return body
Example #14
0
    def plug_in_body(self, depth=1, cmv=None, ids_path=None, subtree=True):
        """Translate the model into a set of "if" javascript statements.

        `depth` controls the size of indentation. As soon as a value is missing
        to evaluate a predicate the output at that node is returned without
        further evaluation.

        """
        metric = "error" if self.regression else "confidence"
        if cmv is None:
            cmv = []
        body = u""
        term_analysis_fields = []
        item_analysis_fields = []
        prefix = u""
        field_obj = self.fields[self.objective_id]

        if len(self.fields) > MAX_ARGS_LENGTH:
            prefix = u"data."
        children = filter_nodes(self.children, ids=ids_path, subtree=subtree)

        if children:

            # field used in the split
            field = split(children)

            has_missing_branch = missing_branch(children)
            # the missing is singled out as a special case only when there's
            # no missing branch in the children list
            one_branch = not has_missing_branch or \
                self.fields[field]['optype'] in COMPOSED_FIELDS
            if (one_branch and not self.fields[field]['camelCase'] in cmv):
                body += self.missing_check_code(field, depth, prefix, cmv,
                                                metric)

            for child in children:

                field = child.predicate.field

                pre_condition = u""
                # code when missing_splits has been used
                if has_missing_branch and child.predicate.value is not None:
                    pre_condition = self.missing_prefix_code(
                        child, field, prefix, cmv)

                # complete split condition code
                body += child.split_condition_code( \
                    field, depth, prefix, pre_condition,
                    term_analysis_fields, item_analysis_fields)

                # value to be determined in next node
                next_level = child.plug_in_body(depth + 1,
                                                cmv=cmv[:],
                                                ids_path=ids_path,
                                                subtree=subtree)
                body += next_level[0]
                body += u"%s}\n" % (INDENT * depth)
                term_analysis_fields.extend(next_level[1])
                item_analysis_fields.extend(next_level[2])

        else:
            value = value_to_print(self.output,
                                   self.fields[self.objective_id]['optype'])
            body = u"%sreturn {prediction: %s, %s: %s};\n" % ( \
                INDENT * depth,
                value,
                metric,
                self.confidence)
        return body, term_analysis_fields, item_analysis_fields
Example #15
0
    def plug_in_body(self, depth=1, cmv=None, input_map=False,
                     ids_path=None, subtree=True):
        """Translate the model into a set of "if" python statements.

        `depth` controls the size of indentation. As soon as a value is missing
        that node is returned without further evaluation.

        """
        if cmv is None:
            cmv = []
        body = u""
        term_analysis_fields = []
        item_analysis_fields = []

        children = filter_nodes(self.children, ids=ids_path,
                                subtree=subtree)
        if children:

            # field used in the split
            field = split(children)

            has_missing_branch = (missing_branch(children) or
                                  none_value(children))
            # the missing is singled out as a special case only when there's
            # no missing branch in the children list
            one_branch = not has_missing_branch or \
                self.fields[field]['optype'] in COMPOSED_FIELDS
            if (one_branch and
                not self.fields[field]['slug'] in cmv):
                body += self.missing_check_code(field, depth, input_map, cmv)

            for child in children:
                field = child.predicate.field
                pre_condition = u""
                # code when missing_splits has been used
                if has_missing_branch and child.predicate.value is not None:
                    pre_condition = self.missing_prefix_code(child, field,
                                                             input_map, cmv)

                # complete split condition code
                body += child.split_condition_code( \
                    field, depth, input_map, pre_condition,
                    term_analysis_fields, item_analysis_fields)

                # value to be determined in next node
                next_level = child.plug_in_body(depth + 1,
                                                cmv=cmv[:],
                                                input_map=input_map,
                                                ids_path=ids_path,
                                                subtree=subtree)

                body += next_level[0]
                term_analysis_fields.extend(next_level[1])
                item_analysis_fields.extend(next_level[2])
        else:
            value = value_to_print(self.output, "numeric")
            body = u"%sreturn {\"prediction\":%s" % (INDENT * depth, value)
            if hasattr(self, "probability"):
                body += u", \"probability\": %s" % self.probability
            body += u"}\n"

        return body, term_analysis_fields, item_analysis_fields