def plug_in(self, out=sys.stdout, hadoop=False, filter_id=None, subtree=True): """Generates a basic javascript implementation of local predictions `out` is file descriptor to write the javascript code. """ # fill the camelcase variable names with the JS_KEYWORDS restrictions objective_field = self.tree.fields[self.tree.objective_id] camelcase = to_camel_js(unidecode(objective_field['name']), False) objective_field['CamelCase'] = camelcase for field in [(key, val) for key, val in sort_fields(self.tree.fields)]: field_obj = self.tree.fields[field[0]] field_obj['camelCase'] = to_camel_js(unidecode(field_obj['name'])) body, term_analysis_predicates, item_analysis_predicates = \ self.tree.plug_in_body() terms_body = "" items_body = "" if term_analysis_predicates: terms_body = self.js_term_analysis_body(term_analysis_predicates) if item_analysis_predicates: items_body = self.js_item_analysis_body(item_analysis_predicates) output = self.js_pre_body() output += terms_body + items_body + body output += u"%sreturn null;\n}\n" % INDENT if not PY3: output = output.encode("utf8") out.write(output) out.flush()
def js_signature(self, input_map=False): """Returns a the javascript signature for a prediction method. """ objective_field = self.tree.fields[self.tree.objective_id] if not 'CamelCase' in objective_field: camelcase = to_camel_js(unidecode(objective_field['name']), False) objective_field['CamelCase'] = camelcase output = u"function predict%s(" % objective_field['CamelCase'] args = [] if len(self.tree.fields) > MAX_ARGS_LENGTH or input_map: args.append("data") else: for field in [(key, val) for key, val in sort_fields(self.tree.fields)]: field_obj = self.tree.fields[field[0]] if not 'camelCase' in field_obj: field_obj['camelCase'] = to_camel_js( \ unidecode(field_obj['name'])) if field[0] != self.tree.objective_id: args.append(u"%s" % field_obj['camelCase']) args_string = u", ".join(args) output += args_string + u")" return output
def list_fields(model, out=sys.stdout): """Prints descriptions of the fields for this model. """ out.write( utf8('<%-32s : %s>\n' % (model.fields[model.objective_id]['name'], model.fields[model.objective_id]['optype']))) out.flush() for field in [(val['name'], val['optype']) for key, val in sort_fields(model.fields) if key != model.objective_id]: out.write(utf8('[%-32s : %s]\n' % (field[0], field[1]))) out.flush() return model.fields
def plug_in(self, out=sys.stdout, filter_id=None, subtree=True): """Writes an R function that implements the model. """ # fill the dotted variable names with the R_KEYWORDS restrictions objective_field = self.tree.fields[self.tree.objective_id] camelcase = to_camel_js(objective_field['name'], False) objective_field['CamelCase'] = camelcase default = "NA" args = [] for field in [(key, val) for key, val in sort_fields(self.tree.fields)]: field_obj = self.tree.fields[field[0]] field_obj['dotted'] = dot(field_obj['name']) args.append("%s=%s" % (field_obj['dotted'], default)) body, term_analysis_predicates, item_analysis_predicates = \ self.tree.plug_in_body() terms_body = "" items_body = "" if term_analysis_predicates: terms_body = self.r_term_analysis_body(term_analysis_predicates) if item_analysis_predicates: items_body = self.r_item_analysis_body(item_analysis_predicates) predictor_definition = (u"predict%s <- function" % camelcase) depth = len(predictor_definition) + 1 predictor = u"%s(%s){\n" % (predictor_definition, (",\n" + " " * depth).join(args)) join_str = "\n#" docstring = join_str.join(self.docstring().split("\n")) predictor_doc = (u"# " + docstring + u"\n" + u"#\n") output = predictor_doc + predictor output += terms_body + items_body + body output += u"%sreturn(NA)\n}\n" % INDENT if not PY3: output = output.encode("utf8") out.write(output) out.flush()
def tree_rules(tree, offsets, objective_id, fields, out, ids_path=None, subtree=True): """Prints out an IF-THEN rule version of the tree. """ for field in sort_fields(fields): slug = slugify(fields[field[0]]['name']) fields[field[0]].update(slug=slug) out.write( utf8( generate_rules(tree, offsets, objective_id, fields, ids_path=ids_path, subtree=subtree))) out.flush()
def mysql(self, out, ids_path=None, subtree=True, attr=None): """Writes a MySQL function that implements the model. """ definition = "CREATE FUNCTION predict_%s (%s)" \ "\nRETURNS %s DETERMINISTIC\nRETURN " args = [] parameters = sort_fields(self.fields) for field_id, field in parameters: field_name_to_show = self.fields[field_id]['name'].strip() field_type = 'NUMERIC' if field['optype'] == 'numeric' else \ 'VARCHAR(250)' if field_name_to_show == "": field_name_to_show = field_id if field_id != self.objective_id: args.append("`%s` %s" % (field_name_to_show, field_type)) objective = self.fields[self.objective_id] function_name = objective['name'] return_type = 'NUMERIC' if objective['optype'] == 'numeric' else \ 'VARCHAR(250)' if function_name == "": function_name = "field_" + self.objective_id # when the output is a confidence metric (error/confidence) if attr is not None: function_name += "_%s" % attr return_type = 'NUMERIC' definition = definition % (function_name, ", ".join(args), return_type) out.write(definition) body = self.tree.plug_in_body(ids_path=ids_path, subtree=subtree, attr=attr) out.write(body) out.flush() return len(body)
def python(self, out, docstring, ids_path=None, subtree=True): """Generates a python function that implements the model. """ args = [] args_tree = [] parameters = sort_fields(self.fields) input_map = len(parameters) > MAX_ARGS_LENGTH and MAX_ARGS_LENGTH > 0 reserved_keywords = PYTHON_KEYWORDS if not input_map else None prefix = "_" if not input_map else "" for field in [(key, val) for key, val in parameters]: field_name_to_show = self.fields[field[0]]['name'].strip() if field_name_to_show == "": field_name_to_show = field[0] slug = slugify(field_name_to_show, reserved_keywords=reserved_keywords, prefix=prefix) self.fields[field[0]].update(slug=slug) if not input_map: if field[0] != self.objective_id: args.append("%s=None" % (slug)) args_tree.append("%s=%s" % (slug, slug)) if input_map: args.append("data={}") args_tree.append("data=data") function_name = self.fields[self.objective_id]['slug'] if \ not self.boosting else \ self.fields[self.boosting["objective_field"]]['slug'] if prefix == "_" and function_name[0] == prefix: function_name = function_name[1:] if function_name == "": function_name = "field_" + self.objective_id python_header = u"#!/usr/bin/env python\n# -*- coding: utf-8 -*-\n" predictor_definition = (u"def predict_%s" % function_name) depth = len(predictor_definition) + 1 predictor = u"%s(%s):\n" % (predictor_definition, (",\n" + " " * depth).join(args)) predictor_doc = (INDENT + u"\"\"\" " + docstring + u"\n" + INDENT + u"\"\"\"\n") body, term_analysis_predicates, item_analysis_predicates = \ self.tree.plug_in_body(input_map=input_map, ids_path=ids_path, subtree=subtree) terms_body = "" if term_analysis_predicates or item_analysis_predicates: terms_body = self.term_analysis_body(term_analysis_predicates, item_analysis_predicates) predictor = python_header + predictor + \ predictor_doc + terms_body + body predictor_model = u"def predict" depth = len(predictor_model) + 1 predictor += u"\n\n%s(%s):\n" % (predictor_model, (",\n" + " " * depth).join(args)) predictor += u"%sprediction = predict_%s(%s)\n" % ( \ INDENT, function_name, ", ".join(args_tree)) if self.boosting is not None: predictor += u"%sprediction.update({\"weight\": %s})\n" % \ (INDENT, self.boosting.get("weight")) if self.boosting.get("objective_class") is not None: predictor += u"%sprediction.update({\"class\": \"%s\"})\n" % \ (INDENT, self.boosting.get("objective_class")) predictor += u"%sreturn prediction" % INDENT if not PY3: predictor = predictor.encode("utf8") out.write(predictor) out.flush()
def tree_python(tree, offsets, fields, objective_id, boosting, out, docstring_str, input_map=False, ids_path=None, subtree=True): """Writes a python function that implements the model. """ args = [] args_tree = [] parameters = sort_fields(fields) if not input_map: input_map = len(parameters) > MAX_ARGS_LENGTH reserved_keywords = keyword.kwlist if not input_map else None prefix = "_" if not input_map else "" for field in parameters: field_name_to_show = fields[field[0]]['name'].strip() if field_name_to_show == "": field_name_to_show = field[0] slug = slugify(field_name_to_show, reserved_keywords=reserved_keywords, prefix=prefix) fields[field[0]].update(slug=slug) if not input_map: if field[0] != objective_id: args.append("%s=None" % (slug)) args_tree.append("%s=%s" % (slug, slug)) if input_map: args.append("data={}") args_tree.append("data=data") function_name = fields[objective_id]['slug'] if \ not boosting else fields[boosting["objective_field"]]['slug'] if prefix == "_" and function_name[0] == prefix: function_name = function_name[1:] if function_name == "": function_name = "field_" + objective_id python_header = "# -*- coding: utf-8 -*-\n" predictor_definition = ("def predict_%s" % function_name) depth = len(predictor_definition) + 1 predictor = "%s(%s):\n" % (predictor_definition, (",\n" + " " * depth).join(args)) predictor_doc = (INDENT + "\"\"\" " + docstring_str + "\n" + INDENT + "\"\"\"\n") body_fn = boosted_plug_in_body if boosting else plug_in_body body, term_analysis_predicates, item_analysis_predicates = \ body_fn(tree, offsets, fields, objective_id, fields[objective_id]["optype"] == NUMERIC, input_map=input_map, ids_path=ids_path, subtree=subtree) terms_body = "" if term_analysis_predicates or item_analysis_predicates: terms_body = term_analysis_body(fields, term_analysis_predicates, item_analysis_predicates) predictor = python_header + predictor + \ predictor_doc + terms_body + body predictor_model = "def predict" depth = len(predictor_model) + 1 predictor += "\n\n%s(%s):\n" % (predictor_model, (",\n" + " " * depth).join(args)) predictor += "%sprediction = predict_%s(%s)\n" % ( \ INDENT, function_name, ", ".join(args_tree)) if boosting is not None: predictor += "%sprediction.update({\"weight\": %s})\n" % \ (INDENT, boosting.get("weight")) if boosting.get("objective_class") is not None: predictor += "%sprediction.update({\"class\": \"%s\"})\n" % \ (INDENT, boosting.get("objective_class")) predictor += "%sreturn prediction" % INDENT out.write(utf8(predictor)) out.flush()