def node_from_yaml(layer_spec): """ Load the specs and initialize the layer nodes """ assert("parameters" in layer_spec and "class_labels" in layer_spec["parameters"] and "node" in layer_spec["parameters"]),\ "Node requires specification of a node and classification labels!" scheme = layer_spec["parameters"].pop("scheme", "1vs1") # Create all nodes that are packed together in this layer layer_nodes = [] node_spec = layer_spec["parameters"]["node"][0] classes = layer_spec["parameters"]["class_labels"] if scheme == '1vR': for label in layer_spec["parameters"]["class_labels"]: node_obj = BaseNode.node_from_yaml( NodeChainFactory.instantiate(node_spec, {"LABEL": label})) layer_nodes.append(node_obj) else: n = len(classes) for i in range(n - 1): for j in range(i + 1, n): replace_dict = {"LABEL1": classes[i], "LABEL2": classes[j]} node_obj = BaseNode.node_from_yaml( NodeChainFactory.instantiate(node_spec, replace_dict)) layer_nodes.append(node_obj) layer_spec["parameters"].pop("node") layer_spec["parameters"].pop("class_labels") # Create the node object node_obj = MultiClassLayerNode(nodes=layer_nodes, **layer_spec["parameters"]) return node_obj
def node_from_yaml(layer_spec): """ Load the specs and initialize the layer nodes """ assert("parameters" in layer_spec and "class_labels" in layer_spec["parameters"] and "node" in layer_spec["parameters"]),\ "Node requires specification of a node and classification labels!" scheme = layer_spec["parameters"].pop("scheme","1vs1") # Create all nodes that are packed together in this layer layer_nodes = [] node_spec = layer_spec["parameters"]["node"][0] classes = layer_spec["parameters"]["class_labels"] if scheme=='1vR': for label in layer_spec["parameters"]["class_labels"]: node_obj = BaseNode.node_from_yaml(NodeChainFactory.instantiate(node_spec,{"LABEL":label})) layer_nodes.append(node_obj) else: n=len(classes) for i in range(n-1): for j in range(i+1,n): replace_dict = {"LABEL1":classes[i],"LABEL2":classes[j]} node_obj = BaseNode.node_from_yaml(NodeChainFactory.instantiate(node_spec,replace_dict)) layer_nodes.append(node_obj) layer_spec["parameters"].pop("node") layer_spec["parameters"].pop("class_labels") # Create the node object node_obj = MultiClassLayerNode(nodes = layer_nodes,**layer_spec["parameters"]) return node_obj
def node_from_yaml(node_spec): """ Create the node based on the node_spec """ node_spec = copy.deepcopy(node_spec) # call parent class method for most of the work node_spec["parameters"], flow_template = \ ParameterOptimizationBase.check_parameters(node_spec["parameters"]) # check grid search specific params optimization = node_spec["parameters"].pop("optimization") assert("ranges" in optimization), "Grid Search needs *ranges* parameter" BaseNode.eval_dict(optimization) node_obj = GridSearchNode(ranges=optimization["ranges"], flow_template=flow_template, **node_spec["parameters"]) return node_obj
def node_from_yaml(node_spec): """ Create the node based on the node_spec """ node_spec = copy.deepcopy(node_spec) # call parent class method for most of the work node_spec["parameters"], flow_template = \ ParameterOptimizationBase.check_parameters(node_spec["parameters"]) if node_spec["parameters"].has_key("optimization"): BaseNode.eval_dict(node_spec["parameters"]["optimization"]) # since pattern search specific params are all optional, add them to # **kwargs and let the __init__ do the default assignments for key, value in node_spec["parameters"].pop("optimization").iteritems(): node_spec["parameters"][key] = value node_obj = PatternSearchNode(flow_template=flow_template, **node_spec["parameters"]) return node_obj
def node_from_yaml(node_spec): """ Create the node based on the node_spec """ node_spec = copy.deepcopy(node_spec) # call parent class method for most of the work node_spec["parameters"], flow_template = \ ParameterOptimizationBase.check_parameters(node_spec["parameters"]) # check grid search specific params optimization = node_spec["parameters"].pop("optimization") assert ("ranges" in optimization), "Grid Search needs *ranges* parameter" BaseNode.eval_dict(optimization) node_obj = GridSearchNode(ranges=optimization["ranges"], flow_template=flow_template, **node_spec["parameters"]) return node_obj
def node_from_yaml(node_spec): """ Create the node based on the node_spec """ node_spec = copy.deepcopy(node_spec) # call parent class method for most of the work node_spec["parameters"], flow_template = \ ParameterOptimizationBase.check_parameters(node_spec["parameters"]) if node_spec["parameters"].has_key("optimization"): BaseNode.eval_dict(node_spec["parameters"]["optimization"]) # since pattern search specific params are all optional, add them to # **kwargs and let the __init__ do the default assignments for key, value in node_spec["parameters"].pop( "optimization").iteritems(): node_spec["parameters"][key] = value node_obj = PatternSearchNode(flow_template=flow_template, **node_spec["parameters"]) return node_obj
def _prepare_node_chain(nodes_spec): """ Creates the FlowNode node and the contained chain based on the node_spec """ assert "parameters" in nodes_spec if "load_path" in nodes_spec["parameters"]: # Let node load pickled object return nodes_spec["parameters"] else: # The node chain has to be specified in YAML syntax assert "nodes" in nodes_spec["parameters"], \ "FlowNode requires specification of a list of nodes " \ "or of a load_path to a pickled node chain." node_sequence = [ExternalGeneratorSourceNode(), AllTrainSplitterNode()] # For all nodes in the specs for node_spec in nodes_spec["parameters"]["nodes"]: # Use factory method to create node node_obj = BaseNode.node_from_yaml(node_spec) # Append this node to the sequence of node node_sequence.append(node_obj) # Check if the nodes have to cache their outputs for index, node in enumerate(node_sequence): # If a node is trainable, it uses the outputs of its input node # at least twice, so we have to cache. if node.is_trainable(): node_sequence[index - 1].set_permanent_attributes(caching=True) # Split node might also request the data from their input nodes # (once for each split), depending on their implementation. We # assume the worst case and activate caching if node.is_split_node(): node_sequence[index - 1].set_permanent_attributes(caching=True) # Determine if any of the nodes is trainable trainable = reduce(operator.or_, [node.is_trainable() for node in node_sequence]) # Determine if any of the nodes requires supervised training supervised = reduce(operator.or_, [node.is_trainable() for node in node_sequence]) # Create the nodes flow = NodeChain(node_sequence) nodes_spec["parameters"].pop("nodes") # Evaluate all remaining parameters if they are eval statements for key, value in nodes_spec["parameters"].iteritems(): if isinstance(value, basestring) and value.startswith("eval("): nodes_spec["parameters"][key] = eval(value[5:-1]) # Create the node object member_dict = copy.deepcopy(nodes_spec["parameters"]) member_dict["nodes"] = flow member_dict["trainable"] = trainable member_dict["supervised"] = supervised return member_dict
def check_parameters(param_spec): """ Check input parameters of existence and appropriateness """ assert("nodes" in param_spec and "optimization" in param_spec),\ "Parameter Optimization node requires specification of a " \ "list of nodes and optimization algorithm!" validation_set = param_spec.get("validation_set", {}) validation_set["splits"] = validation_set.get("splits", 5) validation_set["split_node"] = validation_set.get( "split_node", { 'node': 'CV_Splitter', 'parameters': { 'splits': validation_set["splits"] } }) evaluation = param_spec.get("evaluation", {}) evaluation["ir_class"] = evaluation.get("ir_class", "Target") evaluation["performance_sink_node"] = evaluation.get( "performance_sink_node", { 'node': 'Classification_Performance_Sink', 'parameters': { 'ir_class': evaluation["ir_class"] } }) # build flow template nodes_spec = param_spec.pop("nodes") flow_template = [{ 'node': 'External_Generator_Source_Node' }, validation_set["split_node"]] flow_template.extend(nodes_spec) flow_template.append(evaluation["performance_sink_node"]) # Evaluate all remaining parameters BaseNode.eval_dict(param_spec) # params with defaults in __init__ have to be added to param_spec dict if validation_set.has_key("runs"): param_spec["runs"] = validation_set["runs"] if evaluation.has_key("metric"): param_spec["metric"] = evaluation["metric"] if evaluation.has_key("std_weight"): param_spec["std_weight"] = evaluation['std_weight'] return param_spec, flow_template
def _stop_training(self, debug=False): n0 = self.labels.count(self.classes[0]) n1 = self.labels.count(self.classes[1]) if n0 > n1: # n[0] is divided in packages of size n[1] num = n0 / n1 self.nodes = [] # initialization of the necessary classifier nodes for j in range(num): self.nodes.append(BaseNode.node_from_yaml(self.classifier)) # self.classifier[0]['parameters']['weight']=self.weight#addon # self.nodes.append(BaseNode.node_from_yaml(self.classifier[0]))#addon k = 0 for i in range(len(self.samples)): if self.labels[i] == self.classes[1]: # underrepresented class is sent to all classifiers for classifier in self.nodes: classifier.train(self.samples[i], self.labels[i]) else: # feed into k-th classifier self.nodes[k].train(self.samples[i], self.labels[i]) k = (k + 1) % num # self.nodes[num].train(self.samples[i],self.labels[i])#addon else: # n[1] is divided in packages of size n[0] num = n1 / n0 self.nodes = [] # initialization of the necessary classifier nodes for j in range(num): self.nodes.append(BaseNode.node_from_yaml(self.classifier)) k = 0 for i in range(len(self.samples)): if self.labels[i] == self.classes[0]: # underrepresented class is sent to all classifiers for classifier in self.nodes: classifier.train(self.samples[i], self.labels[i]) else: # feed into k-th classifier self.nodes[k].train(self.samples[i], self.labels[i]) k = (k + 1) % num for classifier in self.nodes: classifier.stop_training(debug) self.num_retained_features = ( "differs maybe" ) # self.nodes[0].num_retained_features # This should be calculated more exactly. self.complexity = "differs" # self.nodes[0].complexity
def _stop_training(self, debug=False): n0 = self.labels.count(self.classes[0]) n1 = self.labels.count(self.classes[1]) if n0 > n1: # n[0] is divided in packages of size n[1] num = n0 / n1 self.nodes = [] # initialization of the necessary classifier nodes for j in range(num): self.nodes.append(BaseNode.node_from_yaml(self.classifier)) # self.classifier[0]['parameters']['weight']=self.weight#addon # self.nodes.append(BaseNode.node_from_yaml(self.classifier[0]))#addon k = 0 for i in range(len(self.samples)): if self.labels[i] == self.classes[1]: # underrepresented class is sent to all classifiers for classifier in self.nodes: classifier.train(self.samples[i], self.labels[i]) else: # feed into k-th classifier self.nodes[k].train(self.samples[i], self.labels[i]) k = (k + 1) % num # self.nodes[num].train(self.samples[i],self.labels[i])#addon else: # n[1] is divided in packages of size n[0] num = n1 / n0 self.nodes = [] # initialization of the necessary classifier nodes for j in range(num): self.nodes.append(BaseNode.node_from_yaml(self.classifier)) k = 0 for i in range(len(self.samples)): if self.labels[i] == self.classes[0]: # underrepresented class is sent to all classifiers for classifier in self.nodes: classifier.train(self.samples[i], self.labels[i]) else: # feed into k-th classifier self.nodes[k].train(self.samples[i], self.labels[i]) k = (k + 1) % num for classifier in self.nodes: classifier.stop_training(debug) self.num_retained_features = "differs maybe" # self.nodes[0].num_retained_features # This should be calculated more exactly. self.complexity = "differs" #self.nodes[0].complexity
def _train(self, data, class_label): """ It is assumed that the class_label parameter contains information about the true class the data belongs to """ self._train_phase_started = True # init of node if self.nodes == None: self.nodes = [BaseNode.node_from_yaml(self.classifier)] self.nodes[0].complexity = self.complexity self.nodes[0].train(data, class_label)
def check_parameters(param_spec): """ Check input parameters of existence and appropriateness """ assert("nodes" in param_spec and "optimization" in param_spec), \ "Parameter Optimization node requires specification of a " \ "list of nodes and optimization algorithm!" validation_set = param_spec.pop("validation_set", {}) validation_set["splits"] = validation_set.get("splits",5) validation_set["split_node"] = validation_set.get("split_node", {'node': 'CV_Splitter', 'parameters': {'splits': validation_set["splits"]}}) evaluation = param_spec.pop("evaluation", {}) evaluation["ir_class"] = evaluation.get("ir_class", "Target") evaluation["performance_sink_node"] = \ evaluation.get("performance_sink_node", {'node': 'Classification_Performance_Sink', 'parameters': {'ir_class': evaluation["ir_class"]}}) # build flow template nodes_spec = param_spec.pop("nodes") flow_template = [{'node': 'External_Generator_Source_Node'}, validation_set["split_node"]] flow_template.extend(nodes_spec) flow_template.append(evaluation["performance_sink_node"]) # Evaluate all remaining parameters BaseNode.eval_dict(param_spec) # params with defaults in __init__ have to be added to param_spec dict if validation_set.has_key("runs"): param_spec["runs"] = validation_set["runs"] if evaluation.has_key("metric"): param_spec["metric"] = evaluation["metric"] if evaluation.has_key("inverse_metric"): param_spec["inverse_metric"] = evaluation["inverse_metric"] if evaluation.has_key("std_weight"): param_spec["std_weight"] = evaluation['std_weight'] return param_spec, flow_template
def node_from_yaml(node_spec): """ Creates a node based on the node_spec to overwrite default """ # This node requires one parameters, namely a list of nodes assert("parameters" in node_spec and "wrapped_node" in node_spec["parameters"]),\ "ConsumeTrainingDataNode requires specification of a wrapped node!" # Create all nodes that are packed together in this layer wrapped_node = BaseNode.node_from_yaml(node_spec["parameters"]["wrapped_node"]) node_spec["parameters"].pop("wrapped_node") # Create the node object node_obj = ConsumeTrainingDataNode(wrapped_node = wrapped_node, **node_spec["parameters"]) return node_obj
def node_from_yaml(node_spec): """ Creates a node based on the node_spec to overwrite default """ # This node requires one parameters, namely a list of nodes assert("parameters" in node_spec and "wrapped_node" in node_spec["parameters"]),\ "ConsumeTrainingDataNode requires specification of a wrapped node!" # Create all nodes that are packed together in this layer wrapped_node = BaseNode.node_from_yaml( node_spec["parameters"]["wrapped_node"]) node_spec["parameters"].pop("wrapped_node") # Create the node object node_obj = ConsumeTrainingDataNode(wrapped_node=wrapped_node, **node_spec["parameters"]) return node_obj
def node_from_yaml(layer_spec): """ Load the specs and initialize the layer nodes """ # This node requires one parameters, namely a list of nodes assert("parameters" in layer_spec and "nodes" in layer_spec["parameters"]),\ "SameInputLayerNode requires specification of a list of nodes!" # Create all nodes that are packed together in this layer layer_nodes = [] for node_spec in layer_spec["parameters"]["nodes"]: node_obj = BaseNode.node_from_yaml(node_spec) layer_nodes.append(node_obj) layer_spec["parameters"].pop("nodes") # Create the node object node_obj = SameInputLayerNode(nodes = layer_nodes,**layer_spec["parameters"]) return node_obj
def node_from_yaml(layer_spec): """ Load the specs and initialize the layer nodes """ # This node requires one parameters, namely a list of nodes assert("parameters" in layer_spec and "nodes" in layer_spec["parameters"]),\ "SameInputLayerNode requires specification of a list of nodes!" # Create all nodes that are packed together in this layer layer_nodes = [] for node_spec in layer_spec["parameters"]["nodes"]: node_obj = BaseNode.node_from_yaml(node_spec) layer_nodes.append(node_obj) layer_spec["parameters"].pop("nodes") # Create the node object node_obj = SameInputLayerNode(nodes=layer_nodes, **layer_spec["parameters"]) return node_obj