def tree_rules(tree, offsets, objective_id, fields, out, ids_path=None, subtree=True): """Prints out an IF-THEN rule version of the tree. """ for field in sort_fields(fields): slug = slugify(fields[field[0]]['name']) fields[field[0]].update(slug=slug) out.write( utf8( generate_rules(tree, offsets, objective_id, fields, ids_path=ids_path, subtree=subtree))) out.flush()
def dot(name): """Creates a dot-separated name """ return slugify(name.replace(" ", "."))
def python(self, out, docstring, ids_path=None, subtree=True): """Generates a python function that implements the model. """ args = [] args_tree = [] parameters = sort_fields(self.fields) input_map = len(parameters) > MAX_ARGS_LENGTH and MAX_ARGS_LENGTH > 0 reserved_keywords = PYTHON_KEYWORDS if not input_map else None prefix = "_" if not input_map else "" for field in [(key, val) for key, val in parameters]: field_name_to_show = self.fields[field[0]]['name'].strip() if field_name_to_show == "": field_name_to_show = field[0] slug = slugify(field_name_to_show, reserved_keywords=reserved_keywords, prefix=prefix) self.fields[field[0]].update(slug=slug) if not input_map: if field[0] != self.objective_id: args.append("%s=None" % (slug)) args_tree.append("%s=%s" % (slug, slug)) if input_map: args.append("data={}") args_tree.append("data=data") function_name = self.fields[self.objective_id]['slug'] if \ not self.boosting else \ self.fields[self.boosting["objective_field"]]['slug'] if prefix == "_" and function_name[0] == prefix: function_name = function_name[1:] if function_name == "": function_name = "field_" + self.objective_id python_header = u"#!/usr/bin/env python\n# -*- coding: utf-8 -*-\n" predictor_definition = (u"def predict_%s" % function_name) depth = len(predictor_definition) + 1 predictor = u"%s(%s):\n" % (predictor_definition, (",\n" + " " * depth).join(args)) predictor_doc = (INDENT + u"\"\"\" " + docstring + u"\n" + INDENT + u"\"\"\"\n") body, term_analysis_predicates, item_analysis_predicates = \ self.tree.plug_in_body(input_map=input_map, ids_path=ids_path, subtree=subtree) terms_body = "" if term_analysis_predicates or item_analysis_predicates: terms_body = self.term_analysis_body(term_analysis_predicates, item_analysis_predicates) predictor = python_header + predictor + \ predictor_doc + terms_body + body predictor_model = u"def predict" depth = len(predictor_model) + 1 predictor += u"\n\n%s(%s):\n" % (predictor_model, (",\n" + " " * depth).join(args)) predictor += u"%sprediction = predict_%s(%s)\n" % ( \ INDENT, function_name, ", ".join(args_tree)) if self.boosting is not None: predictor += u"%sprediction.update({\"weight\": %s})\n" % \ (INDENT, self.boosting.get("weight")) if self.boosting.get("objective_class") is not None: predictor += u"%sprediction.update({\"class\": \"%s\"})\n" % \ (INDENT, self.boosting.get("objective_class")) predictor += u"%sreturn prediction" % INDENT if not PY3: predictor = predictor.encode("utf8") out.write(predictor) out.flush()
def hadoop_python_mapper(self, out=sys.stdout, ids_path=None, subtree=True): """Generates a hadoop mapper header to make predictions in python """ input_fields = [(value, key) for ( key, value) in sorted(self.inverted_fields.items(), key=lambda x: x[1])] parameters = [ value for (key, value) in input_fields if key != self.tree.objective_id ] args = [] for field in input_fields: slug = slugify(self.tree.fields[field[0]]['name']) self.tree.fields[field[0]].update(slug=slug) if field[0] != self.tree.objective_id: args.append("\"" + self.tree.fields[field[0]]['slug'] + "\"") with open(HADOOP_CSV_TEMPLATE) as template_hander: output = template_handler.read() % u",".join(parameters) output += u"\n%sself.INPUT_FIELDS = [%s]\n" % \ ((INDENT * 3), (",\n " + INDENT * 8).join(args)) input_types = [] prefixes = [] suffixes = [] count = 0 fields = self.tree.fields for key in [ field[0] for field in input_fields if field[0] != self.tree.objective_id ]: input_type = ('None' if not fields[key]['datatype'] in PYTHON_CONV else PYTHON_CONV[fields[key]['datatype']]) input_types.append(input_type) if 'prefix' in fields[key]: prefixes.append("%s: %s" % (count, repr(fields[key]['prefix']))) if 'suffix' in fields[key]: suffixes.append("%s: %s" % (count, repr(fields[key]['suffix']))) count += 1 static_content = "%sself.INPUT_TYPES = [" % (INDENT * 3) formatter = ",\n%s" % (" " * len(static_content)) output += u"\n%s%s%s" % (static_content, formatter.join(input_types), "]\n") static_content = "%sself.PREFIXES = {" % (INDENT * 3) formatter = ",\n%s" % (" " * len(static_content)) output += u"\n%s%s%s" % (static_content, formatter.join(prefixes), "}\n") static_content = "%sself.SUFFIXES = {" % (INDENT * 3) formatter = ",\n%s" % (" " * len(static_content)) output += u"\n%s%s%s" % (static_content, formatter.join(suffixes), "}\n") with open(HADOOP_NEXT_TEMPLATE) as template_hander: output += template_handler.read() out.write(output) out.flush() self.tree.python(out, self.docstring(), input_map=True, ids_path=ids_path, subtree=subtree) output = \ u""" csv = CSVInput() for values in csv: if not isinstance(values, bool): print u'%%s\\t%%s' %% (repr(values), repr(predict_%s(values))) \n\n """ % fields[self.tree.objective_id]['slug'] out.write(output) out.flush()
def hadoop_python_mapper(self, out=sys.stdout, ids_path=None, subtree=True): """Generates a hadoop mapper header to make predictions in python """ input_fields = [(value, key) for (key, value) in sorted(self.inverted_fields.items(), key=lambda x: x[1])] parameters = [value for (key, value) in input_fields if key != self.tree.objective_id] args = [] for field in input_fields: slug = slugify(self.tree.fields[field[0]]['name']) self.tree.fields[field[0]].update(slug=slug) if field[0] != self.tree.objective_id: args.append("\"" + self.tree.fields[field[0]]['slug'] + "\"") with open(HADOOP_CSV_TEMPLATE) as template_hander: output = template_handler.read() % u",".join(parameters) output += u"\n%sself.INPUT_FIELDS = [%s]\n" % \ ((INDENT * 3), (",\n " + INDENT * 8).join(args)) input_types = [] prefixes = [] suffixes = [] count = 0 fields = self.tree.fields for key in [field[0] for field in input_fields if field[0] != self.tree.objective_id]: input_type = ('None' if not fields[key]['datatype'] in PYTHON_CONV else PYTHON_CONV[fields[key]['datatype']]) input_types.append(input_type) if 'prefix' in fields[key]: prefixes.append("%s: %s" % (count, repr(fields[key]['prefix']))) if 'suffix' in fields[key]: suffixes.append("%s: %s" % (count, repr(fields[key]['suffix']))) count += 1 static_content = "%sself.INPUT_TYPES = [" % (INDENT * 3) formatter = ",\n%s" % (" " * len(static_content)) output += u"\n%s%s%s" % (static_content, formatter.join(input_types), "]\n") static_content = "%sself.PREFIXES = {" % (INDENT * 3) formatter = ",\n%s" % (" " * len(static_content)) output += u"\n%s%s%s" % (static_content, formatter.join(prefixes), "}\n") static_content = "%sself.SUFFIXES = {" % (INDENT * 3) formatter = ",\n%s" % (" " * len(static_content)) output += u"\n%s%s%s" % (static_content, formatter.join(suffixes), "}\n") with open(HADOOP_NEXT_TEMPLATE) as template_hander: output += template_handler.read() out.write(output) out.flush() self.tree.python(out, self.docstring(), input_map=True, ids_path=ids_path, subtree=subtree) output = \ u""" csv = CSVInput() for values in csv: if not isinstance(values, bool): print u'%%s\\t%%s' %% (repr(values), repr(predict_%s(values))) \n\n """ % fields[self.tree.objective_id]['slug'] out.write(output) out.flush()
def tree_python(tree, offsets, fields, objective_id, boosting, out, docstring_str, input_map=False, ids_path=None, subtree=True): """Writes a python function that implements the model. """ args = [] args_tree = [] parameters = sort_fields(fields) if not input_map: input_map = len(parameters) > MAX_ARGS_LENGTH reserved_keywords = keyword.kwlist if not input_map else None prefix = "_" if not input_map else "" for field in parameters: field_name_to_show = fields[field[0]]['name'].strip() if field_name_to_show == "": field_name_to_show = field[0] slug = slugify(field_name_to_show, reserved_keywords=reserved_keywords, prefix=prefix) fields[field[0]].update(slug=slug) if not input_map: if field[0] != objective_id: args.append("%s=None" % (slug)) args_tree.append("%s=%s" % (slug, slug)) if input_map: args.append("data={}") args_tree.append("data=data") function_name = fields[objective_id]['slug'] if \ not boosting else fields[boosting["objective_field"]]['slug'] if prefix == "_" and function_name[0] == prefix: function_name = function_name[1:] if function_name == "": function_name = "field_" + objective_id python_header = "# -*- coding: utf-8 -*-\n" predictor_definition = ("def predict_%s" % function_name) depth = len(predictor_definition) + 1 predictor = "%s(%s):\n" % (predictor_definition, (",\n" + " " * depth).join(args)) predictor_doc = (INDENT + "\"\"\" " + docstring_str + "\n" + INDENT + "\"\"\"\n") body_fn = boosted_plug_in_body if boosting else plug_in_body body, term_analysis_predicates, item_analysis_predicates = \ body_fn(tree, offsets, fields, objective_id, fields[objective_id]["optype"] == NUMERIC, input_map=input_map, ids_path=ids_path, subtree=subtree) terms_body = "" if term_analysis_predicates or item_analysis_predicates: terms_body = term_analysis_body(fields, term_analysis_predicates, item_analysis_predicates) predictor = python_header + predictor + \ predictor_doc + terms_body + body predictor_model = "def predict" depth = len(predictor_model) + 1 predictor += "\n\n%s(%s):\n" % (predictor_model, (",\n" + " " * depth).join(args)) predictor += "%sprediction = predict_%s(%s)\n" % ( \ INDENT, function_name, ", ".join(args_tree)) if boosting is not None: predictor += "%sprediction.update({\"weight\": %s})\n" % \ (INDENT, boosting.get("weight")) if boosting.get("objective_class") is not None: predictor += "%sprediction.update({\"class\": \"%s\"})\n" % \ (INDENT, boosting.get("objective_class")) predictor += "%sreturn prediction" % INDENT out.write(utf8(predictor)) out.flush()