def plug_in_body(self, body=u"", conditions=None, cmv=None, ids_path=None, subtree=True, attr=None): """Translate the model into a set of "if" statemets in tableau syntax `depth` controls the size of indentation. As soon as a value is missing that node is returned without further evaluation. """ if cmv is None: cmv = [] if body: alternate = u"ELSEIF" else: if conditions is None: conditions = [] alternate = u"IF" children = filter_nodes(self.children, ids=ids_path, subtree=subtree) if children: field = split(children) has_missing_branch = (missing_branch(children) or none_value(children)) # the missing is singled out as a special case only when there's # no missing branch in the children list one_branch = not has_missing_branch or \ self.fields[field]['optype'] in COMPOSED_FIELDS if (one_branch and not self.fields[field]['name'] in cmv): body += self.missing_check_code(field, alternate, cmv, conditions, attr=attr) alternate = u"ELSEIF" for child in children: pre_condition = u"" post_condition = u"" if has_missing_branch and child.predicate.value is not None: pre_condition = self.missing_prefix_code(child, field, cmv) post_condition = u")" child.split_condition_code(field, conditions, pre_condition, post_condition) body = child.plug_in_body(body, conditions[:], cmv=cmv[:], ids_path=ids_path, subtree=subtree, attr=attr) del conditions[-1] else: if attr is None: value = value_to_print( \ self.output, self.fields[self.objective_id]['optype']) else: value = getattr(self, attr) body += u"%s %s THEN" % (alternate, " AND ".join(conditions)) body += u" %s\n" % value return body
def plug_in_body(self, depth=1, cmv=None, input_map=False, ids_path=None, subtree=True): """Translate the model into a set of "if" python statements. `depth` controls the size of indentation. As soon as a value is missing that node is returned without further evaluation. """ if cmv is None: cmv = [] body = u"" term_analysis_fields = [] item_analysis_fields = [] children = filter_nodes(self.children, ids=ids_path, subtree=subtree) if children: # field used in the split field = split(children) has_missing_branch = (missing_branch(children) or none_value(children)) # the missing is singled out as a special case only when there's # no missing branch in the children list one_branch = not has_missing_branch or \ self.fields[field]['optype'] in COMPOSED_FIELDS if (one_branch and not self.fields[field]['slug'] in cmv): body += self.missing_check_code(field, depth, input_map, cmv) for child in children: field = child.predicate.field pre_condition = u"" # code when missing_splits has been used if has_missing_branch and child.predicate.value is not None: pre_condition = self.missing_prefix_code( child, field, input_map, cmv) # complete split condition code body += child.split_condition_code( \ field, depth, input_map, pre_condition, term_analysis_fields, item_analysis_fields) # value to be determined in next node next_level = child.plug_in_body(depth + 1, cmv=cmv[:], input_map=input_map, ids_path=ids_path, subtree=subtree) body += next_level[0] term_analysis_fields.extend(next_level[1]) item_analysis_fields.extend(next_level[2]) else: value = value_to_print(self.output, "numeric") body = u"%sreturn {\"prediction\":%s" % (INDENT * depth, value) if hasattr(self, "probability"): body += u", \"probability\": %s" % self.probability body += u"}\n" return body, term_analysis_fields, item_analysis_fields
def plug_in_body(self, ids_path=None, subtree=True, prefix=None, metric=CONFIDENCE): """Translate the model into a set of functions, one per node, that contain only if statements and function calls `depth` controls the size of indentation. As soon as a value is missing that node is returned without further evaluation. """ # label for the confidence measure and initialization metric = metric if metric in CONFIDENCE_METRICS else "confidence" if prefix is None: prefix = "" term_analysis_fields = [] item_analysis_fields = [] functions = [] nodes = old_filter_nodes(list(self.nodes.values()), ids=ids_path, subtree=subtree) if nodes: for node in nodes: depth = 1 body = "%sdef %sn_%s(data):\n" % (INDENT * depth, prefix, node.id) depth += 1 children = [self.nodes[key] for key in node.children] if children: # field used in the split field = children[0].predicate.field has_missing_branch = (missing_branch(children) or none_value(children)) # the missing is singled out as a special case only when # there's no missing branch in the children list has_one_branch = not has_missing_branch or \ self.fields[field]['optype'] in COMPOSED_FIELDS if has_one_branch: body += self.missing_check_code( \ field, node, depth, metric) condition = True for child in children: if condition: # only first child has if condition field = child.predicate.field pre_condition = "" # code when missing_splits has been used if has_missing_branch and child.predicate.value \ is not None: pre_condition = child.missing_prefix_code( \ field) # complete split condition code body += child.split_condition_code( \ field, depth, pre_condition, term_analysis_fields, item_analysis_fields) # body += next_level[0] depth += 1 body += "%sreturn %sn_%s(data)\n" % \ (INDENT * depth, prefix, child.id) depth -= 2 condition = False else: value = value_to_print( \ node.output, self.fields[self.objective_id]['optype']) body += "%sreturn {\"prediction\":%s, \"%s\":%s}\n" % ( \ INDENT * depth, value, metric, getattr(node, metric)) depth -= 1 functions.append(body) return functions, term_analysis_fields, item_analysis_fields
def plug_in_body(self, depth=0, cmv=None, ids_path=None, subtree=True, body=u"", attr=None): """Translate the model into a mysql function `depth` controls the size of indentation. As soon as a value is missing that node is returned without further evaluation. `attr` is used to decide the value returned by the function. When it's set to None, the prediction is returned. When set to the name of an attribute (e.g. 'confidence') this attribute is returned """ if cmv is None: cmv = [] if body: alternate = u",\n%sIF (" % (depth * INDENT) else: alternate = u"IF (" post_missing_body = u"" children = filter_nodes(self.children, ids=ids_path, subtree=subtree) if children: # field used in the split field = split(children) has_missing_branch = (missing_branch(children) or none_value(children)) # the missing is singled out as a special case only when there's # no missing branch in the children list if (not has_missing_branch and not self.fields[field]['name'] in cmv): body += self.missing_check_code(field, alternate, cmv, attr) depth += 1 alternate = u",\n%sIF (" % (depth * INDENT) post_missing_body += u")" for child in children: pre_condition = u"" # code when missing splits has been used if has_missing_branch and child.predicate.value is not None: pre_condition = self.missing_prefix_code(child, field, cmv) # complete split condition code body += child.split_condition_code( \ field, alternate, pre_condition) depth += 1 alternate = u",\n%sIF (" % (depth * INDENT) body = child.plug_in_body(depth, cmv=cmv[:], ids_path=ids_path, subtree=subtree, body=body, attr=attr) body += u", NULL))" + post_missing_body post_missing_body = u"" else: if attr is None: value = value_to_print( \ self.output, self.fields[self.objective_id]['optype']) else: value = getattr(self, attr) body += u", %s" % (value) return body
def tableau_body(self, body=u"", conditions=None, cmv=None, ids_path=None, subtree=True): """Translate the model into a set of "if" statements in Tableau syntax `depth` controls the size of indentation. As soon as a value is missing that node is returned without further evaluation. """ if cmv is None: cmv = [] if body: alternate = u"ELSEIF" else: if conditions is None: conditions = [] alternate = u"IF" children = filter_nodes(self.children, ids=ids_path, subtree=subtree) if children: field = split(children) has_missing_branch = (missing_branch(children) or none_value(children)) # the missing is singled out as a special case only when there's # no missing branch in the children list if (not has_missing_branch and self.fields[field]['name'] not in cmv): conditions.append("ISNULL([%s])" % self.fields[field]['name']) body += (u"%s %s THEN " % (alternate, " AND ".join(conditions))) if self.fields[self.objective_id]['optype'] == 'numeric': value = self.output else: value = tableau_string(self.output) body += (u"%s\n" % value) cmv.append(self.fields[field]['name']) alternate = u"ELSEIF" del conditions[-1] for child in children: pre_condition = u"" post_condition = u"" if has_missing_branch and child.predicate.value is not None: negation = u"" if child.predicate.missing else u"NOT " connection = u"OR" if child.predicate.missing else u"AND" pre_condition = ( u"(%sISNULL([%s]) %s " % (negation, self.fields[field]['name'], connection)) if not child.predicate.missing: cmv.append(self.fields[field]['name']) post_condition = u")" optype = self.fields[child.predicate.field]['optype'] if child.predicate.value is None: value = "" elif optype == 'text' or optype == 'items': return u"" elif optype == 'numeric': value = child.predicate.value else: value = repr(child.predicate.value) operator = (u"" if child.predicate.value is None else PYTHON_OPERATOR[child.predicate.operator]) if child.predicate.value is None: pre_condition = ( T_MISSING_OPERATOR[child.predicate.operator]) post_condition = u")" conditions.append( "%s[%s]%s%s%s" % (pre_condition, self.fields[child.predicate.field]['name'], operator, value, post_condition)) body = child.tableau_body(body, conditions[:], cmv=cmv[:], ids_path=ids_path, subtree=subtree) del conditions[-1] else: if self.fields[self.objective_id]['optype'] == 'numeric': value = self.output else: value = tableau_string(self.output) body += (u"%s %s THEN" % (alternate, " AND ".join(conditions))) body += u" %s\n" % value return body
def python_body(self, depth=1, cmv=None, input_map=False, ids_path=None, subtree=True): """Translate the model into a set of "if" python statements. `depth` controls the size of indentation. As soon as a value is missing that node is returned without further evaluation. """ def map_data(field, missing=False): """Returns the subject of the condition in map format when more than MAX_ARGS_LENGTH arguments are used. """ if input_map: if missing: return "data.get('%s')" % field else: return "data['%s']" % field return field if cmv is None: cmv = [] body = u"" term_analysis_fields = [] item_analysis_fields = [] children = filter_nodes(self.children, ids=ids_path, subtree=subtree) if children: field = split(children) has_missing_branch = (missing_branch(children) or none_value(children)) # the missing is singled out as a special case only when there's # no missing branch in the children list if not has_missing_branch and \ self.fields[field]["optype"] not in ["text", "items"] and \ self.fields[field]['slug'] not in cmv: body += (u"%sif (%s is None):\n" % (INDENT * depth, map_data(self.fields[field]['slug'], True))) if self.fields[self.objective_id]['optype'] == 'numeric': value = self.output else: value = repr(self.output) body += (u"%sreturn %s\n" % (INDENT * (depth + 1), value)) cmv.append(self.fields[field]['slug']) for child in children: field = child.predicate.field pre_condition = u"" if has_missing_branch and child.predicate.value is not None: negation = u"" if child.predicate.missing else u" not" connection = u"or" if child.predicate.missing else u"and" pre_condition = (u"%s is%s None %s " % (map_data(self.fields[field]['slug'], True), negation, connection)) if not child.predicate.missing: cmv.append(self.fields[field]['slug']) optype = self.fields[field]['optype'] if (optype == 'numeric' or optype == 'text' or optype == 'items' or child.predicate.value is None): value = child.predicate.value else: value = repr(child.predicate.value) if optype == 'text' or optype == 'items': if optype == 'text': term_analysis_fields.append( (field, child.predicate.term)) matching_function = "term_matches" else: item_analysis_fields.append( (field, child.predicate.term)) matching_function = "item_matches" body += ( u"%sif (%s%s(%s, \"%s\", %s\"%s\") %s %s):" u"\n" % (INDENT * depth, pre_condition, matching_function, map_data(self.fields[field]['slug'], False), self.fields[field]['slug'], ('u' if isinstance(child.predicate.term, unicode) else ''), child.predicate.term.replace("\"", "\\\""), PYTHON_OPERATOR[child.predicate.operator], value)) else: operator = (MISSING_OPERATOR[child.predicate.operator] if child.predicate.value is None else PYTHON_OPERATOR[child.predicate.operator]) if child.predicate.value is None: cmv.append(self.fields[field]['slug']) body += (u"%sif (%s%s %s %s):\n" % (INDENT * depth, pre_condition, map_data(self.fields[field]['slug'], False), operator, value)) next_level = child.python_body(depth + 1, cmv=cmv[:], input_map=input_map, ids_path=ids_path, subtree=subtree) body += next_level[0] term_analysis_fields.extend(next_level[1]) item_analysis_fields.extend(next_level[2]) else: if self.fields[self.objective_id]['optype'] == 'numeric': value = self.output else: value = repr(self.output) body = u"%sreturn %s\n" % (INDENT * depth, value) return body, term_analysis_fields, item_analysis_fields
def tableau_body(self, body=u"", conditions=None, cmv=None, ids_path=None, subtree=True): """Translate the model into a set of "if" statements in Tableau syntax `depth` controls the size of indentation. As soon as a value is missing that node is returned without further evaluation. """ if cmv is None: cmv = [] if body: alternate = u"ELSEIF" else: if conditions is None: conditions = [] alternate = u"IF" children = filter_nodes(self.children, ids=ids_path, subtree=subtree) if children: field = split(children) has_missing_branch = (missing_branch(children) or none_value(children)) # the missing is singled out as a special case only when there's # no missing branch in the children list if (not has_missing_branch and self.fields[field]['name'] not in cmv): conditions.append("ISNULL([%s])" % self.fields[field]['name']) body += (u"%s %s THEN " % (alternate, " AND ".join(conditions))) if self.fields[self.objective_id]['optype'] == 'numeric': value = self.output else: value = tableau_string(self.output) body += (u"%s\n" % value) cmv.append(self.fields[field]['name']) alternate = u"ELSEIF" del conditions[-1] for child in children: pre_condition = u"" post_condition = u"" if has_missing_branch and child.predicate.value is not None: negation = u"" if child.predicate.missing else u"NOT " connection = u"OR" if child.predicate.missing else u"AND" pre_condition = ( u"(%sISNULL([%s]) %s " % ( negation, self.fields[field]['name'], connection)) if not child.predicate.missing: cmv.append(self.fields[field]['name']) post_condition = u")" optype = self.fields[child.predicate.field]['optype'] if child.predicate.value is None: value = "" elif optype == 'text' or optype == 'items': return u"" elif optype == 'numeric': value = child.predicate.value else: value = repr(child.predicate.value) operator = (u"" if child.predicate.value is None else PYTHON_OPERATOR[child.predicate.operator]) if child.predicate.value is None: pre_condition = ( T_MISSING_OPERATOR[child.predicate.operator]) post_condition = u")" conditions.append("%s[%s]%s%s%s" % ( pre_condition, self.fields[child.predicate.field]['name'], operator, value, post_condition)) body = child.tableau_body(body, conditions[:], cmv=cmv[:], ids_path=ids_path, subtree=subtree) del conditions[-1] else: if self.fields[self.objective_id]['optype'] == 'numeric': value = self.output else: value = tableau_string(self.output) body += ( u"%s %s THEN" % (alternate, " AND ".join(conditions))) body += u" %s\n" % value return body
def python_body(self, depth=1, cmv=None, input_map=False, ids_path=None, subtree=True): """Translate the model into a set of "if" python statements. `depth` controls the size of indentation. As soon as a value is missing that node is returned without further evaluation. """ def map_data(field, missing=False): """Returns the subject of the condition in map format when more than MAX_ARGS_LENGTH arguments are used. """ if input_map: if missing: return "data.get('%s')" % field else: return "data['%s']" % field return field if cmv is None: cmv = [] body = u"" term_analysis_fields = [] item_analysis_fields = [] children = filter_nodes(self.children, ids=ids_path, subtree=subtree) if children: field = split(children) has_missing_branch = (missing_branch(children) or none_value(children)) # the missing is singled out as a special case only when there's # no missing branch in the children list if not has_missing_branch and \ self.fields[field]["optype"] not in ["text", "items"] and \ self.fields[field]['slug'] not in cmv: body += (u"%sif (%s is None):\n" % (INDENT * depth, map_data(self.fields[field]['slug'], True))) if self.fields[self.objective_id]['optype'] == 'numeric': value = self.output else: value = repr(self.output) body += (u"%sreturn %s\n" % (INDENT * (depth + 1), value)) cmv.append(self.fields[field]['slug']) for child in children: field = child.predicate.field pre_condition = u"" if has_missing_branch and child.predicate.value is not None: negation = u"" if child.predicate.missing else u" not" connection = u"or" if child.predicate.missing else u"and" pre_condition = ( u"%s is%s None %s " % ( map_data(self.fields[field]['slug'], True), negation, connection)) if not child.predicate.missing: cmv.append(self.fields[field]['slug']) optype = self.fields[field]['optype'] if (optype == 'numeric' or optype == 'text' or optype == 'items' or child.predicate.value is None): value = child.predicate.value else: value = repr(child.predicate.value) if optype == 'text' or optype == 'items': if optype == 'text': term_analysis_fields.append((field, child.predicate.term)) matching_function = "term_matches" else: item_analysis_fields.append((field, child.predicate.term)) matching_function = "item_matches" body += ( u"%sif (%s%s(%s, \"%s\", %s\"%s\") %s %s):" u"\n" % (INDENT * depth, pre_condition, matching_function, map_data(self.fields[field]['slug'], False), self.fields[field]['slug'], ('u' if isinstance(child.predicate.term, unicode) else ''), child.predicate.term.replace("\"", "\\\""), PYTHON_OPERATOR[child.predicate.operator], value)) else: operator = (MISSING_OPERATOR[child.predicate.operator] if child.predicate.value is None else PYTHON_OPERATOR[child.predicate.operator]) if child.predicate.value is None: cmv.append(self.fields[field]['slug']) body += ( u"%sif (%s%s %s %s):\n" % (INDENT * depth, pre_condition, map_data(self.fields[field]['slug'], False), operator, value)) next_level = child.python_body(depth + 1, cmv=cmv[:], input_map=input_map, ids_path=ids_path, subtree=subtree) body += next_level[0] term_analysis_fields.extend(next_level[1]) item_analysis_fields.extend(next_level[2]) else: if self.fields[self.objective_id]['optype'] == 'numeric': value = self.output else: value = repr(self.output) body = u"%sreturn %s\n" % (INDENT * depth, value) return body, term_analysis_fields, item_analysis_fields
def plug_in_body(self, depth=1, cmv=None, input_map=False, ids_path=None, subtree=True): """Translate the model into a set of "if" python statements. `depth` controls the size of indentation. As soon as a value is missing that node is returned without further evaluation. """ if cmv is None: cmv = [] body = u"" term_analysis_fields = [] item_analysis_fields = [] children = filter_nodes(self.children, ids=ids_path, subtree=subtree) if children: # field used in the split field = split(children) has_missing_branch = (missing_branch(children) or none_value(children)) # the missing is singled out as a special case only when there's # no missing branch in the children list one_branch = not has_missing_branch or \ self.fields[field]['optype'] in COMPOSED_FIELDS if (one_branch and not self.fields[field]['slug'] in cmv): body += self.missing_check_code(field, depth, input_map, cmv) for child in children: field = child.predicate.field pre_condition = u"" # code when missing_splits has been used if has_missing_branch and child.predicate.value is not None: pre_condition = self.missing_prefix_code(child, field, input_map, cmv) # complete split condition code body += child.split_condition_code( \ field, depth, input_map, pre_condition, term_analysis_fields, item_analysis_fields) # value to be determined in next node next_level = child.plug_in_body(depth + 1, cmv=cmv[:], input_map=input_map, ids_path=ids_path, subtree=subtree) body += next_level[0] term_analysis_fields.extend(next_level[1]) item_analysis_fields.extend(next_level[2]) else: value = value_to_print(self.output, "numeric") body = u"%sreturn {\"prediction\":%s" % (INDENT * depth, value) if hasattr(self, "probability"): body += u", \"probability\": %s" % self.probability body += u"}\n" return body, term_analysis_fields, item_analysis_fields