Beispiel #1
0
    def process_alg_parameters(self, alg):
        """
        Processes an algorithm element checking for iteration parameters. Expands at most one, creating multiple
        subexperiments in the process

        :param alg:
        :return: list of algorithms, possibly expanded
        """

        iteration_params = list(itertools.ifilter(lambda (par): safe_xml_path(par, ['@from']),
                                                  force_list(alg['param'])))
        other_params = list(itertools.ifilter(lambda (par): not safe_xml_path(par, ['@from']),
                                              force_list(alg['param'])))

        if len(iteration_params) > 1:
            logging.error("Multiple iteration parameters: %s. Ignoring all but first.", alg['@name'])
            bad_params = [self.create_iter_elems(param, True) for param in iteration_params[1:]]
            iteration_params = iteration_params[0:1]
            other_params = other_params + list(itertools.chain(*bad_params))

        # At this point there should only be a maximum of a single iteration parameter. It could be ill-formed.
        if len(iteration_params) != 0:
            iter_elems = Config.create_iter_elems(iteration_params[0], False)
            alg_expand = []
            for iter_elem in iter_elems:
                params = list(other_params).append(iter_elem)   # Need to make a copy
                new_alg = {'@name': alg['@name'], 'param': params}
                alg_expand.append(new_alg)
        else:
            alg_expand = [alg]

        return alg_expand
Beispiel #2
0
    def build_exps(self, config_dict):
        """
        Builds the experiment elements. The steps of each experiment will not have been completely resolved in
        resolve_refs because they are one level deeper in the dictionary.

        The complexity here comes from handling the possibility of iteration and the ability to run several algorithms
        at once, which requires that an experiment be split into several sub-experiments. Canonical naming of these
        sub-experiments is non-trivial, so I am just numbering them, but in the end, this will mean that some experiment
        changes, like changing the iteration range of a parameter are probably best handled through creating new
        experiments. Hmm.
        :param config_dict:
        :return:
        """
        exps = config_dict['exp']
        for eid, exp in exps.items():
            data_elem = exp['data']
            split_elem = exp['splitter']
            alg_elems = force_list(exp['alg'])
            # Optional elements
            if safe_xml_path(exp, ["xform"]):
                xform_elem = exp['xform']
            else:
                xform_elem = None
            if safe_xml_path(exp, ["metric"]):
                metric_elems = force_list(exp['metric'])
            else:
                metric_elems = []
            if safe_xml_path(exp, ["post"]):
                post_elems = force_list(exp['post'])
            else:
                post_elems = []

            data_elem = self.resolve_ref(data_elem, config_dict['data'])
            xform_elem = self.resolve_ref(xform_elem, config_dict['xform'])
            split_elem = self.resolve_ref(split_elem, config_dict['splitter'])
            alg_elems = [self.resolve_ref(alg_elem, config_dict['alg']) for alg_elem in alg_elems]
            metric_elems = [self.resolve_ref(metric_elem, config_dict['metric']) for metric_elem in metric_elems]
            post_elems = [self.resolve_ref(post_elem, config_dict['post-process']) for post_elem in post_elems]

            alg_elems = self.expand_iterations(alg_elems)

            # Create create new experiment ids if there are multiple algorithms.
            if len(alg_elems) == 1:
                self.set_experiment_entry(eid, data_elem, xform_elem, split_elem, alg_elems[0],
                                          metric_elems, post_elems)
            else:
                exp_subids = ["{}-{}".format(eid, i) for i in range(0, len(alg_elems))]

                # Create sub-experiments for multiple algorithms
                # The sub-experiments each have their own dictionary entries. The original experiment is replaced
                # by a list of references. The idea is that if you run that, you are effectively running the collection
                # of the others.
                for i in range(0, len(alg_elems)):
                    self.set_experiment_entry(exp_subids[i], data_elem, xform_elem, split_elem, alg_elems[i],
                                              metric_elems, post_elems)
                sub_exp_refs = [{'@ref': subid} for subid in exp_subids]
                self._exp_dict[eid] = sub_exp_refs
 def test_inherit_param(self):
     self.conf.from_file("conf/test-properties1.xml")
     exp = self.conf.get_experiment('FT-IB-err')
     self.assertTrue('alg' in exp, "Missing alg element in experiment")
     self.assertTrue(utils.safe_xml_path(exp, ['alg', 'param']),
                     "Missing parameters in experiment")
     self.assertTrue(
         any((param['@name'] == 'rec.similarity.class'
              for param in exp['alg']['param'])),
         "Parameter not inherited for algorithm in experiment")
Beispiel #4
0
    def expand_iterations(self, alg_list):
        """
        If there are any algorithms with iteration parameters, create copies of the algorithm,
        one per iteration.

        Iterations have the form <param name= from= to= by=/>. Only numeric from, to, and by attributes are
        accepted. From and to of the range are inclusing. Alg elements only for the moment, although one could
        make the case for metrics, too. The requirements for the iteration parameter is that from < to, by > 0, and
        (to - from) / by <= 10. No error is signaled if this is violated, but it fails silently and uses just the
        start point of the range. May need to do something more drastic. Another complication will arise if the
        algorithm copies
        :return: updated dictionary
        """
        alg_expansions = []
        for alg in alg_list:
            if safe_xml_path(alg, ["param"]):
                algs = self.process_alg_parameters(alg)
                alg_expansions.append(algs)
            else:
                alg_expansions.append([alg])

        return list(itertools.chain(*alg_expansions))
Beispiel #5
0
    def create_named(element, configs):
        """
        Takes an element and a list of configuration dictionaries and extracts the named
        entities of the given type.

        :param element: An element name. Expected to be from the _ELEMENTS list
        :param configs: A list of configuration dictionaries
        :return: the dictionary mapping ids to element chunks from the configuration dictionary.

        Elements with id attributes are given generated tokens as ids.
        """
        element_dict = {}

        for config in configs:
            if safe_xml_path(config, [Config._XML_HEAD, element]):
                for chunk in force_list(config[Config._XML_HEAD][element]):
                    if '@id' in chunk:
                        eid = chunk['@id']
                    else:
                        eid = Config.get_new_id()

                    element_dict[eid] = chunk

        return element_dict
Beispiel #6
0
 def get_metric(self, eid):
     if safe_xml_path(self._config_dict, ['metric', eid]):
         return self._config_dict['metric'][eid]
     else:
         logging.error("Reference to unknown metric element %s", eid)
         return {}
Beispiel #7
0
 def get_algorithm(self, eid):
     if safe_xml_path(self._config_dict, ['alg', eid]):
         return self._config_dict['alg'][eid]
     else:
         logging.error("Reference to unknown algorithm element %s", eid)
         return {}
Beispiel #8
0
 def get_splitter(self, eid):
     if safe_xml_path(self._config_dict, ['splitter', eid]):
         return self._config_dict['splitter'][eid]
     else:
         logging.error("Reference to unknown splitter element %s", eid)
         return {}
Beispiel #9
0
 def get_xform(self, eid):
     if safe_xml_path(self._config_dict, ['xform', eid]):
         return self._config_dict['xform'][eid]
     else:
         logging.error("Reference to unknown xform element %s", eid)
         return {}
Beispiel #10
0
 def get_data(self, eid):
     if safe_xml_path(self._config_dict, ['data', eid]):
         return self._config_dict['data'][eid]
     else:
         logging.error("Reference to unknown data element %s", eid)
         return {}
Beispiel #11
0
 def get_post_process(self, eid):
     if safe_xml_path(self._config_dict, ['post-process', eid]):
         return self._config_dict['post-process'][eid]
     else:
         logging.error("Reference to unknown post-process element %s", eid)
         return {}
 def test_xform(self):
     self.conf.from_file("conf/test-properties1.xml")
     exp = self.conf.get_experiment('FT-IB-err')
     self.assertTrue('xform' in exp, "Missing xform element in experiment")
     self.assertTrue(utils.safe_xml_path(exp, ['xform', 'zero-action']),
                     "Missing parameters in xform element")