Exemple #1
0
            yield config


if __name__ == '__main__':
    import json
    import sys
    import logging

    from docopt import docopt

    logging.basicConfig(level=logging.DEBUG)

    opts = docopt(__doc__)

    # First check that configurations look ok.
    # Requiered to be included in some config
    requiered = [{u"classifier": u"logit",
                  u"feature_selection": None,
                  u"scaler": True}]
    # Requiered to be excluded from all configs
    excluded = [{u"feature_selection": u"dtree",
                 u"classifier": u"dtree"},
                {u"feature_selection": u"kbest",
                 u"classifier": u"dtree"}]
    configs = list(iter_configs(opts[u"<testdata.csv>"], opts[u"<dbname>"]))
    always = "config_version data_shuffle_seed train_percentage".split()
    check_configs(configs, requiered, excluded, always=always)

    json.dump(configs, sys.stdout, sort_keys=True, indent=4,
              separators=(u',', u': '))
Exemple #2
0
                max_score = max(config[u'prediction_config']['scale_to_range'])
            config[u'fact_threshold'] = max_score - config.pop(u'fact_threshold_distance')
            config[u'evidence_threshold'] = max_score - config.pop(u'evidence_threshold_distance')
            if (config[u'classifier_config'][u'classifier'] == u'svm' and
                config[u'prediction_config'][u'method'] == u'predict_proba'):
                # we'll split this config in 2 options: actual predict_proba,
                # and decision_function
                config_copied = deepcopy(config)
                config_copied[u'classifier_config'][u'classifier_args'][u'probability'] = True
                yield config_copied
                # http://scikit-learn.org/stable/modules/svm.html#scores-and-probabilities
                config[u'classifier_config'][u'classifier_args'][u'probability'] = False
                config[u'prediction_config'][u'method'] = u'decision_function'
            yield config


if __name__ == '__main__':
    import json
    import sys
    import logging

    from docopt import docopt

    logging.basicConfig(level=logging.INFO)

    opts = docopt(__doc__)
    configs = list(iter_configs(opts[u'<testdata.csv>'], opts[u'<dbname>']))
    check_configs(configs, estimated_minutes_per_config=1.5)
    json.dump(configs, sys.stdout, sort_keys=True, indent=4,
              separators=(u',', u': '))