Esempio n. 1
0
    def plug_in(self,
                out=sys.stdout,
                hadoop=False,
                filter_id=None,
                subtree=True):
        """Generates a basic javascript implementation of local predictions

        `out` is file descriptor to write the javascript code.

        """
        # fill the camelcase variable names with the JS_KEYWORDS restrictions
        objective_field = self.tree.fields[self.tree.objective_id]
        camelcase = to_camel_js(unidecode(objective_field['name']), False)
        objective_field['CamelCase'] = camelcase
        for field in [(key, val)
                      for key, val in sort_fields(self.tree.fields)]:
            field_obj = self.tree.fields[field[0]]
            field_obj['camelCase'] = to_camel_js(unidecode(field_obj['name']))

        body, term_analysis_predicates, item_analysis_predicates = \
            self.tree.plug_in_body()
        terms_body = ""
        items_body = ""
        if term_analysis_predicates:
            terms_body = self.js_term_analysis_body(term_analysis_predicates)
        if item_analysis_predicates:
            items_body = self.js_item_analysis_body(item_analysis_predicates)
        output = self.js_pre_body()
        output += terms_body + items_body + body
        output += u"%sreturn null;\n}\n" % INDENT
        if not PY3:
            output = output.encode("utf8")
        out.write(output)
        out.flush()
Esempio n. 2
0
    def plug_in(self, out=sys.stdout, hadoop=False,
                filter_id=None, subtree=True):
        """Generates a basic javascript implementation of local predictions

        `out` is file descriptor to write the javascript code.

        """
        # fill the camelcase variable names with the JS_KEYWORDS restrictions
        objective_field = self.tree.fields[self.tree.objective_id]
        camelcase = to_camel_js(unidecode(objective_field['name']), False)
        objective_field['CamelCase'] = camelcase
        for field in [(key, val) for key, val in
                      sort_fields(self.tree.fields)]:
            field_obj = self.tree.fields[field[0]]
            field_obj['camelCase'] = to_camel_js(unidecode(field_obj['name']))

        body, term_analysis_predicates, item_analysis_predicates = \
            self.tree.plug_in_body()
        terms_body = ""
        items_body = ""
        if term_analysis_predicates:
            terms_body = self.js_term_analysis_body(term_analysis_predicates)
        if item_analysis_predicates:
            items_body = self.js_item_analysis_body(item_analysis_predicates)
        output = self.js_pre_body()
        output += terms_body + items_body + body
        output += u"%sreturn null;\n}\n" % INDENT
        if not PY3:
            output = output.encode("utf8")
        out.write(output)
        out.flush()
Esempio n. 3
0
    def js_signature(self, input_map=False):
        """Returns a the javascript signature for a prediction method.

        """
        objective_field = self.tree.fields[self.tree.objective_id]
        if not 'CamelCase' in objective_field:
            camelcase = to_camel_js(unidecode(objective_field['name']), False)
            objective_field['CamelCase'] = camelcase

        output = u"function predict%s(" % objective_field['CamelCase']

        args = []
        if len(self.tree.fields) > MAX_ARGS_LENGTH or input_map:
            args.append("data")
        else:
            for field in [(key, val)
                          for key, val in sort_fields(self.tree.fields)]:
                field_obj = self.tree.fields[field[0]]
                if not 'camelCase' in field_obj:
                    field_obj['camelCase'] = to_camel_js( \
                        unidecode(field_obj['name']))
                if field[0] != self.tree.objective_id:
                    args.append(u"%s" % field_obj['camelCase'])
        args_string = u", ".join(args)
        output += args_string + u")"

        return output
Esempio n. 4
0
    def js_signature(self, input_map=False):
        """Returns a the javascript signature for a prediction method.

        """
        objective_field = self.tree.fields[self.tree.objective_id]
        if not 'CamelCase' in objective_field:
            camelcase = to_camel_js(unidecode(objective_field['name']), False)
            objective_field['CamelCase'] = camelcase

        output = u"function predict%s(" % objective_field['CamelCase']

        args = []
        if len(self.tree.fields) > MAX_ARGS_LENGTH or input_map:
            args.append("data")
        else:
            for field in [(key, val) for key, val in
                          sort_fields(self.tree.fields)]:
                field_obj = self.tree.fields[field[0]]
                if not 'camelCase' in field_obj:
                    field_obj['camelCase'] = to_camel_js( \
                        unidecode(field_obj['name']))
                if field[0] != self.tree.objective_id:
                    args.append(u"%s" % field_obj['camelCase'])
        args_string = u", ".join(args)
        output += args_string + u")"

        return output
Esempio n. 5
0
def list_fields(model, out=sys.stdout):
    """Prints descriptions of the fields for this model.

    """
    out.write(
        utf8('<%-32s : %s>\n' % (model.fields[model.objective_id]['name'],
                                 model.fields[model.objective_id]['optype'])))
    out.flush()

    for field in [(val['name'], val['optype'])
                  for key, val in sort_fields(model.fields)
                  if key != model.objective_id]:
        out.write(utf8('[%-32s : %s]\n' % (field[0], field[1])))
        out.flush()
    return model.fields
Esempio n. 6
0
    def plug_in(self, out=sys.stdout, filter_id=None, subtree=True):
        """Writes an R function that implements the model.

        """
        # fill the dotted variable names with the R_KEYWORDS restrictions
        objective_field = self.tree.fields[self.tree.objective_id]
        camelcase = to_camel_js(objective_field['name'], False)
        objective_field['CamelCase'] = camelcase
        default = "NA"
        args = []
        for field in [(key, val) for key, val in
                      sort_fields(self.tree.fields)]:
            field_obj = self.tree.fields[field[0]]
            field_obj['dotted'] = dot(field_obj['name'])
            args.append("%s=%s" % (field_obj['dotted'], default))

        body, term_analysis_predicates, item_analysis_predicates = \
            self.tree.plug_in_body()
        terms_body = ""
        items_body = ""
        if term_analysis_predicates:
            terms_body = self.r_term_analysis_body(term_analysis_predicates)
        if item_analysis_predicates:
            items_body = self.r_item_analysis_body(item_analysis_predicates)

        predictor_definition = (u"predict%s <- function" %
                                camelcase)
        depth = len(predictor_definition) + 1
        predictor = u"%s(%s){\n" % (predictor_definition,
                                   (",\n" + " " * depth).join(args))
        join_str = "\n#"
        docstring = join_str.join(self.docstring().split("\n"))
        predictor_doc = (u"# " + docstring +
                         u"\n" + u"#\n")
        output = predictor_doc + predictor
        output += terms_body + items_body + body
        output += u"%sreturn(NA)\n}\n" % INDENT
        if not PY3:
            output = output.encode("utf8")
        out.write(output)
        out.flush()
Esempio n. 7
0
    def plug_in(self, out=sys.stdout, filter_id=None, subtree=True):
        """Writes an R function that implements the model.

        """
        # fill the dotted variable names with the R_KEYWORDS restrictions
        objective_field = self.tree.fields[self.tree.objective_id]
        camelcase = to_camel_js(objective_field['name'], False)
        objective_field['CamelCase'] = camelcase
        default = "NA"
        args = []
        for field in [(key, val)
                      for key, val in sort_fields(self.tree.fields)]:
            field_obj = self.tree.fields[field[0]]
            field_obj['dotted'] = dot(field_obj['name'])
            args.append("%s=%s" % (field_obj['dotted'], default))

        body, term_analysis_predicates, item_analysis_predicates = \
            self.tree.plug_in_body()
        terms_body = ""
        items_body = ""
        if term_analysis_predicates:
            terms_body = self.r_term_analysis_body(term_analysis_predicates)
        if item_analysis_predicates:
            items_body = self.r_item_analysis_body(item_analysis_predicates)

        predictor_definition = (u"predict%s <- function" % camelcase)
        depth = len(predictor_definition) + 1
        predictor = u"%s(%s){\n" % (predictor_definition,
                                    (",\n" + " " * depth).join(args))
        join_str = "\n#"
        docstring = join_str.join(self.docstring().split("\n"))
        predictor_doc = (u"# " + docstring + u"\n" + u"#\n")
        output = predictor_doc + predictor
        output += terms_body + items_body + body
        output += u"%sreturn(NA)\n}\n" % INDENT
        if not PY3:
            output = output.encode("utf8")
        out.write(output)
        out.flush()
Esempio n. 8
0
    def tree_rules(tree,
                   offsets,
                   objective_id,
                   fields,
                   out,
                   ids_path=None,
                   subtree=True):
        """Prints out an IF-THEN rule version of the tree.

        """
        for field in sort_fields(fields):

            slug = slugify(fields[field[0]]['name'])
            fields[field[0]].update(slug=slug)
        out.write(
            utf8(
                generate_rules(tree,
                               offsets,
                               objective_id,
                               fields,
                               ids_path=ids_path,
                               subtree=subtree)))
        out.flush()
Esempio n. 9
0
    def mysql(self, out, ids_path=None, subtree=True, attr=None):
        """Writes a MySQL function that implements the model.

        """

        definition = "CREATE FUNCTION predict_%s (%s)" \
                     "\nRETURNS %s DETERMINISTIC\nRETURN "
        args = []
        parameters = sort_fields(self.fields)
        for field_id, field in parameters:
            field_name_to_show = self.fields[field_id]['name'].strip()
            field_type = 'NUMERIC' if field['optype'] == 'numeric' else \
                'VARCHAR(250)'
            if field_name_to_show == "":
                field_name_to_show = field_id
            if field_id != self.objective_id:
                args.append("`%s` %s" % (field_name_to_show,
                                         field_type))
        objective = self.fields[self.objective_id]
        function_name = objective['name']
        return_type = 'NUMERIC' if objective['optype'] == 'numeric' else \
            'VARCHAR(250)'
        if function_name == "":
            function_name = "field_" + self.objective_id
        # when the output is a confidence metric (error/confidence)
        if attr is not None:
            function_name += "_%s" % attr
            return_type = 'NUMERIC'
        definition = definition % (function_name, ", ".join(args), return_type)
        out.write(definition)
        body = self.tree.plug_in_body(ids_path=ids_path, subtree=subtree,
                                      attr=attr)

        out.write(body)
        out.flush()
        return len(body)
Esempio n. 10
0
    def mysql(self, out, ids_path=None, subtree=True, attr=None):
        """Writes a MySQL function that implements the model.

        """

        definition = "CREATE FUNCTION predict_%s (%s)" \
                     "\nRETURNS %s DETERMINISTIC\nRETURN "
        args = []
        parameters = sort_fields(self.fields)
        for field_id, field in parameters:
            field_name_to_show = self.fields[field_id]['name'].strip()
            field_type = 'NUMERIC' if field['optype'] == 'numeric' else \
                'VARCHAR(250)'
            if field_name_to_show == "":
                field_name_to_show = field_id
            if field_id != self.objective_id:
                args.append("`%s` %s" % (field_name_to_show, field_type))
        objective = self.fields[self.objective_id]
        function_name = objective['name']
        return_type = 'NUMERIC' if objective['optype'] == 'numeric' else \
            'VARCHAR(250)'
        if function_name == "":
            function_name = "field_" + self.objective_id
        # when the output is a confidence metric (error/confidence)
        if attr is not None:
            function_name += "_%s" % attr
            return_type = 'NUMERIC'
        definition = definition % (function_name, ", ".join(args), return_type)
        out.write(definition)
        body = self.tree.plug_in_body(ids_path=ids_path,
                                      subtree=subtree,
                                      attr=attr)

        out.write(body)
        out.flush()
        return len(body)
Esempio n. 11
0
    def python(self, out, docstring, ids_path=None, subtree=True):
        """Generates a python function that implements the model.

        """

        args = []
        args_tree = []
        parameters = sort_fields(self.fields)
        input_map = len(parameters) > MAX_ARGS_LENGTH and MAX_ARGS_LENGTH > 0
        reserved_keywords = PYTHON_KEYWORDS if not input_map else None
        prefix = "_" if not input_map else ""
        for field in [(key, val) for key, val in parameters]:
            field_name_to_show = self.fields[field[0]]['name'].strip()
            if field_name_to_show == "":
                field_name_to_show = field[0]
            slug = slugify(field_name_to_show,
                           reserved_keywords=reserved_keywords,
                           prefix=prefix)
            self.fields[field[0]].update(slug=slug)
            if not input_map:
                if field[0] != self.objective_id:
                    args.append("%s=None" % (slug))
                    args_tree.append("%s=%s" % (slug, slug))
        if input_map:
            args.append("data={}")
            args_tree.append("data=data")

        function_name = self.fields[self.objective_id]['slug'] if \
            not self.boosting else \
            self.fields[self.boosting["objective_field"]]['slug']
        if prefix == "_" and function_name[0] == prefix:
            function_name = function_name[1:]
        if function_name == "":
            function_name = "field_" + self.objective_id
        python_header = u"#!/usr/bin/env python\n# -*- coding: utf-8 -*-\n"
        predictor_definition = (u"def predict_%s" % function_name)
        depth = len(predictor_definition) + 1
        predictor = u"%s(%s):\n" % (predictor_definition,
                                    (",\n" + " " * depth).join(args))
        predictor_doc = (INDENT + u"\"\"\" " + docstring + u"\n" + INDENT +
                         u"\"\"\"\n")
        body, term_analysis_predicates, item_analysis_predicates = \
            self.tree.plug_in_body(input_map=input_map,
                                   ids_path=ids_path,
                                   subtree=subtree)
        terms_body = ""
        if term_analysis_predicates or item_analysis_predicates:
            terms_body = self.term_analysis_body(term_analysis_predicates,
                                                 item_analysis_predicates)
        predictor = python_header + predictor + \
            predictor_doc + terms_body + body

        predictor_model = u"def predict"
        depth = len(predictor_model) + 1
        predictor += u"\n\n%s(%s):\n" % (predictor_model,
                                         (",\n" + " " * depth).join(args))
        predictor += u"%sprediction = predict_%s(%s)\n" % ( \
            INDENT, function_name, ", ".join(args_tree))

        if self.boosting is not None:
            predictor += u"%sprediction.update({\"weight\": %s})\n" % \
                (INDENT, self.boosting.get("weight"))
            if self.boosting.get("objective_class") is not None:
                predictor += u"%sprediction.update({\"class\": \"%s\"})\n" % \
                    (INDENT, self.boosting.get("objective_class"))
        predictor += u"%sreturn prediction" % INDENT

        if not PY3:
            predictor = predictor.encode("utf8")
        out.write(predictor)
        out.flush()
Esempio n. 12
0
    def python(self, out, docstring, ids_path=None, subtree=True):
        """Generates a python function that implements the model.

        """

        args = []
        args_tree = []
        parameters = sort_fields(self.fields)
        input_map = len(parameters) > MAX_ARGS_LENGTH and MAX_ARGS_LENGTH > 0
        reserved_keywords = PYTHON_KEYWORDS if not input_map else None
        prefix = "_" if not input_map else ""
        for field in [(key, val) for key, val in parameters]:
            field_name_to_show = self.fields[field[0]]['name'].strip()
            if field_name_to_show == "":
                field_name_to_show = field[0]
            slug = slugify(field_name_to_show,
                           reserved_keywords=reserved_keywords, prefix=prefix)
            self.fields[field[0]].update(slug=slug)
            if not input_map:
                if field[0] != self.objective_id:
                    args.append("%s=None" % (slug))
                    args_tree.append("%s=%s" % (slug, slug))
        if input_map:
            args.append("data={}")
            args_tree.append("data=data")

        function_name = self.fields[self.objective_id]['slug'] if \
            not self.boosting else \
            self.fields[self.boosting["objective_field"]]['slug']
        if prefix == "_" and function_name[0] == prefix:
            function_name = function_name[1:]
        if function_name == "":
            function_name = "field_" + self.objective_id
        python_header = u"#!/usr/bin/env python\n# -*- coding: utf-8 -*-\n"
        predictor_definition = (u"def predict_%s" %
                                function_name)
        depth = len(predictor_definition) + 1
        predictor = u"%s(%s):\n" % (predictor_definition,
                                   (",\n" + " " * depth).join(args))
        predictor_doc = (INDENT + u"\"\"\" " + docstring +
                         u"\n" + INDENT + u"\"\"\"\n")
        body, term_analysis_predicates, item_analysis_predicates = \
            self.tree.plug_in_body(input_map=input_map,
                                   ids_path=ids_path,
                                   subtree=subtree)
        terms_body = ""
        if term_analysis_predicates or item_analysis_predicates:
            terms_body = self.term_analysis_body(term_analysis_predicates,
                                                 item_analysis_predicates)
        predictor = python_header + predictor + \
            predictor_doc + terms_body + body

        predictor_model = u"def predict"
        depth = len(predictor_model) + 1
        predictor += u"\n\n%s(%s):\n" % (predictor_model,
                                         (",\n" + " " * depth).join(args))
        predictor += u"%sprediction = predict_%s(%s)\n" % ( \
            INDENT, function_name, ", ".join(args_tree))

        if self.boosting is not None:
            predictor += u"%sprediction.update({\"weight\": %s})\n" % \
                (INDENT, self.boosting.get("weight"))
            if self.boosting.get("objective_class") is not None:
                predictor += u"%sprediction.update({\"class\": \"%s\"})\n" % \
                    (INDENT, self.boosting.get("objective_class"))
        predictor += u"%sreturn prediction" % INDENT

        if not PY3:
            predictor = predictor.encode("utf8")
        out.write(predictor)
        out.flush()
Esempio n. 13
0
def tree_python(tree,
                offsets,
                fields,
                objective_id,
                boosting,
                out,
                docstring_str,
                input_map=False,
                ids_path=None,
                subtree=True):
    """Writes a python function that implements the model.

    """
    args = []
    args_tree = []
    parameters = sort_fields(fields)
    if not input_map:
        input_map = len(parameters) > MAX_ARGS_LENGTH
    reserved_keywords = keyword.kwlist if not input_map else None
    prefix = "_" if not input_map else ""
    for field in parameters:
        field_name_to_show = fields[field[0]]['name'].strip()
        if field_name_to_show == "":
            field_name_to_show = field[0]
        slug = slugify(field_name_to_show,
                       reserved_keywords=reserved_keywords,
                       prefix=prefix)
        fields[field[0]].update(slug=slug)
        if not input_map:
            if field[0] != objective_id:
                args.append("%s=None" % (slug))
                args_tree.append("%s=%s" % (slug, slug))
    if input_map:
        args.append("data={}")
        args_tree.append("data=data")

    function_name = fields[objective_id]['slug'] if \
        not boosting else fields[boosting["objective_field"]]['slug']
    if prefix == "_" and function_name[0] == prefix:
        function_name = function_name[1:]
    if function_name == "":
        function_name = "field_" + objective_id
    python_header = "# -*- coding: utf-8 -*-\n"
    predictor_definition = ("def predict_%s" % function_name)
    depth = len(predictor_definition) + 1
    predictor = "%s(%s):\n" % (predictor_definition,
                               (",\n" + " " * depth).join(args))

    predictor_doc = (INDENT + "\"\"\" " + docstring_str + "\n" + INDENT +
                     "\"\"\"\n")
    body_fn = boosted_plug_in_body if boosting else plug_in_body
    body, term_analysis_predicates, item_analysis_predicates = \
        body_fn(tree, offsets, fields, objective_id,
                fields[objective_id]["optype"] == NUMERIC,
                input_map=input_map,
                ids_path=ids_path, subtree=subtree)
    terms_body = ""
    if term_analysis_predicates or item_analysis_predicates:
        terms_body = term_analysis_body(fields, term_analysis_predicates,
                                        item_analysis_predicates)
    predictor = python_header + predictor + \
        predictor_doc + terms_body + body

    predictor_model = "def predict"
    depth = len(predictor_model) + 1
    predictor += "\n\n%s(%s):\n" % (predictor_model,
                                    (",\n" + " " * depth).join(args))
    predictor += "%sprediction = predict_%s(%s)\n" % ( \
        INDENT, function_name, ", ".join(args_tree))

    if boosting is not None:
        predictor += "%sprediction.update({\"weight\": %s})\n" % \
            (INDENT, boosting.get("weight"))
        if boosting.get("objective_class") is not None:
            predictor += "%sprediction.update({\"class\": \"%s\"})\n" % \
                (INDENT, boosting.get("objective_class"))
    predictor += "%sreturn prediction" % INDENT

    out.write(utf8(predictor))
    out.flush()