Ejemplo n.º 1
0
def selectBestModel(project_file, results_model_file):
    f = open(results_model_file + '.results.html', 'w')

    project = yaml.load(open(project_file, 'r'))
    className = project['className']
    results_dir = project['resultsDirectory']

    if os.path.exists(results_dir):
        classifierType = None  # all types

        cr = ClassificationResults()
        print('Loading all results...')
        cr.readResults(results_dir)

        accuracy, filename, params = cr.best(1, classifierType)[0]
        print("RESULT " + project_file + '\t' + str(accuracy) + '\t' +
              filename)

        f.write('<h1>%s (%s)</h1>\nAccuracy: %s\n' %
                (className, project_file, accuracy))

        cm = ConfusionMatrix()
        cm.load(filename)
        f.write(cm.toHtml())

        filename = filename.replace('.result', '.param')

        trainSVMHistory(project_file, filename, results_model_file, className)
        shutil.copyfile(filename, results_model_file + '.param')

    else:
        print("RESULT " + "No results found for ", project_file,
              ": cannot build a model")
        f.write('<h1>%s (%s) </h1>\nResults not found\n' %
                (collection, project_file))
Ejemplo n.º 2
0
def selectBestModel(project_file, results_model_file):
    f = open(results_model_file + '.results.html', 'w')

    project = yaml.load(open(project_file, 'r'))
    className = project['className']
    results_dir = project['resultsDirectory']

    if os.path.exists(results_dir):
        classifierType = None # all types

        cr = ClassificationResults()
        print 'Loading all results...'
        cr.readResults(results_dir)

        accuracy, filename, params = cr.best(1, classifierType)[0]
        print "RESULT " + project_file + '\t' + str(accuracy) + '\t' + filename

        f.write('<h1>%s (%s)</h1>\nAccuracy: %s\n' % (className, project_file, accuracy))

        cm = ConfusionMatrix()
        cm.load(filename)
        f.write(cm.toHtml())

        filename = filename.replace('.result', '.param')

        trainSVMHistory(project_file, filename, results_model_file, className)
        shutil.copyfile(filename, results_model_file + '.param')

    else:
        print "RESULT " + "No results found for ", project_file, ": cannot build a model"
        f.write('<h1>%s (%s) </h1>\nResults not found\n' % (collection, project_file))
Ejemplo n.º 3
0
    def testAccuraciesNFold(self):
        cm = ConfusionMatrix()

        # Fold 0 with acc = 100%
        cm = self.populateFold(cm, 1, 0, 1, 0, fold=0)

        # Fold 1 with acc = 0%
        cm = self.populateFold(cm, 0, 1, 0, 1, fold=1)

        # Resulting accuracy should be the average
        self.assertEqual(cm.accuracy(), 50.0)
Ejemplo n.º 4
0
    def testNormalizedAccuracy(self):
        cm = ConfusionMatrix()

        # Consider the following confussion matrix,
        #      A   B
        #  A | 3 | 1 |
        #  B | 6 | 2 |
        # raw accuracy is 5 / 12 ~= 41.67%
        # However, normalized accuracy divides each
        # class contrubion by the class population:
        # A accuracy: 3 / 4, B accuracy: 2 / 8
        # normalized accuracy is 50%
        cm = self.populateFold(cm, 3, 1, 2, 6, fold=0)

        self.assertEqual(cm.accuracy(), 100 * 5. / 12)
        self.assertEqual(cm.normalizedAccuracy(), 50.0)
Ejemplo n.º 5
0
def selectBestModel():

    parser = OptionParser(
        usage='%prog [options] project_file results_model_file')

    options, args = parser.parse_args()

    try:
        project_file = args[0]
        results_model_file = args[1]
    except:
        parser.print_help()
        sys.exit(1)

    f = open(results_model_file + '.results.html', 'w')

    project = yaml.load(open(project_file, 'r'))
    className = project['className']
    results_dir = project['resultsDirectory']

    if os.path.exists(results_dir):
        classifierType = None  # all types

        cr = ClassificationResults()
        print 'Loading all results...'
        cr.readResults(results_dir)

        accuracy, filename, params = cr.best(1, classifierType)[0]
        print "RESULT " + project_file + '\t' + str(accuracy) + '\t' + filename

        f.write('<h1>%s (%s)</h1>\nAccuracy: %s\n' %
                (className, project_file, accuracy))

        cm = ConfusionMatrix()
        cm.load(filename)
        f.write(cm.toHtml())

        filename = filename.replace('.result', '.param')

        trainSVMHistory(project_file, filename, results_model_file, className)
        shutil.copyfile(filename, results_model_file + '.param')

    else:
        print "RESULT " + "No results found for ", project_file, ": cannot build a model"
        f.write('<h1>%s (%s) </h1>\nResults not found\n' %
                (collection, project_file))
Ejemplo n.º 6
0
    def readResults(self, dir):
        """Reads all the results file contained in the given directory, and generates the
        associated ConfusionMatrix for each one."""

        resultFiles = glob.glob(join(dir, '*.result'))
        progress = TextProgress(len(resultFiles))

        for i, filename in enumerate(resultFiles):
            cm = ConfusionMatrix()
            cm.load(filename)

            paramFile = splitext(filename)[0] + '.param'
            params = yaml.load(open(paramFile).read())

            self.results += [ (filename, cm, params) ]

            progress.update(i+1)
Ejemplo n.º 7
0
    def readResults(self, dir):
        """Reads all the results file contained in the given directory, and generates the
        associated ConfusionMatrix for each one."""

        resultFiles = glob.glob(join(dir, '*.result'))
        progress = TextProgress(len(resultFiles))

        for i, filename in enumerate(resultFiles):
            cm = ConfusionMatrix()
            cm.load(filename)

            paramFile = splitext(filename)[0] + '.param'
            params = yaml.load(open(paramFile).read())

            self.results += [ (filename, cm, params) ]

            progress.update(i+1)
Ejemplo n.º 8
0
    def testStdNfold(self):
        cm = ConfusionMatrix()

        # Fold 0 with acc = 0%
        cm = self.populateFold(cm, 0, 1, 0, 1, fold=0)

        # Fold 1 with acc = 10%
        cm = self.populateFold(cm, 1, 9, 1, 9, fold=1)

        # Fold 2 with acc = 20%
        cm = self.populateFold(cm, 2, 8, 2, 8, fold=2)

        nfolds = 3.
        values = [-10. * -10., 0., 10. * 10.]  # each fold contribution to the
        # std after substractig the mean
        analitic_std = sqrt(sum(values) / nfolds)

        self.assertEqual(cm.stdNfold(), analitic_std)
def select_best_model(project_dir):
    """Selects most accurate classifier parameters for the specified project.

    Args:
        project_file_path: Path to the project file in YAML format.

    Returns:
        Dictionary that contains information about best model for the dataset:
            - parameters: classifier parameters for selected model;
            - accuracy: accuracy of selected model;
            - confusion_matrix: simplified version of confusion matrix for
                selected model.
            - history_path: path to the history file generated using returned
                set of parameters for the best model.
    """
    with open(os.path.join(project_dir, PROJECT_FILE_NAME)) as project_file:
        project = yaml.load(project_file)

    classifierName = project["className"]
    results = ClassificationResults()
    results.readResults(project["resultsDirectory"])
    best_accuracy, best_result_file, best_params = results.best(1, None)[0]

    cm = ConfusionMatrix()
    cm.load(best_result_file)
    simplified_cm = {}
    for key, val in cm.matrix.items():
        simplified_cm[key] = {}
        for predicted_key, predicted_val in val.items():
            simplified_cm[key][predicted_key] = len(predicted_val)

    history_file_path = os.path.join(project_dir,
                                     "%s.history" % classifierName)
    train_svm_history(project, best_params, history_file_path)

    return {
        "parameters": best_params,
        "accuracy": round(best_accuracy, 2),
        "confusion_matrix": simplified_cm,
        "history_path": history_file_path,
    }
def select_best_model(project_dir):
    """Selects most accurate classifier parameters for the specified project.

    Args:
        project_file_path: Path to the project file in YAML format.

    Returns:
        Dictionary that contains information about best model for the dataset:
            - parameters: classifier parameters for selected model;
            - accuracy: accuracy of selected model;
            - confusion_matrix: simplified version of confusion matrix for
                selected model.
            - history_path: path to the history file generated using returned
                set of parameters for the best model.
    """
    with open(os.path.join(project_dir, PROJECT_FILE_NAME)) as project_file:
        project = yaml.load(project_file)

    classifierName = project["className"]
    results = ClassificationResults()
    results.readResults(project["resultsDirectory"])
    best_accuracy, best_result_file, best_params = results.best(1, None)[0]

    cm = ConfusionMatrix()
    cm.load(best_result_file)
    simplified_cm = {}
    for key, val in cm.matrix.items():
        simplified_cm[key] = {}
        for predicted_key, predicted_val in val.items():
            simplified_cm[key][predicted_key] = len(predicted_val)

    history_file_path = os.path.join(project_dir, "%s.history" % classifierName)
    train_svm_history(project, best_params, history_file_path)

    return {
        "parameters": best_params,
        "accuracy": round(best_accuracy, 2),
        "confusion_matrix": simplified_cm,
        "history_path": history_file_path,
    }
Ejemplo n.º 11
0
    def testStdNfoldNormalizedAccuracies(self):
        # Same test considering normalized accuracies
        cm = ConfusionMatrix()

        # Fold 0 with normalized acc = 0%
        cm = self.populateFold(cm, 0, 1, 0, 1, fold=0)

        # Fold 1 with normalized acc = 10%
        cm = self.populateFold(cm, 4, 16, 0, 10, fold=1)

        # Fold 2 with normalized acc = 20%
        cm = self.populateFold(cm, 2, 8, 2, 8, fold=2)
        # resulting accuracy should be the average
        nfolds = 3.
        values = [-10. * -10., 0., 10. * 10.]  # each fold contribution to the
        # std after substractig the mean
        analitic_std = sqrt(sum(values) / nfolds)

        self.assertEqual(cm.stdNfold(normalizedAccuracies=True), analitic_std)

        # Also make sure that this test does not work without the normalization
        self.assertNotEqual(cm.stdNfold(normalizedAccuracies=False),
                            analitic_std)
Ejemplo n.º 12
0
# the terms of the GNU Affero General Public License as published by the Free
# Software Foundation (FSF), either version 3 of the License, or (at your
# option) any later version.
#
# This program is distributed in the hope that it will be useful, but WITHOUT
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
# FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
# details.
#
# You should have received a copy of the Affero GNU General Public License     
# version 3 along with this program. If not, see http://www.gnu.org/licenses/


from __future__ import print_function
import sys
from gaia2.classification import ConfusionMatrix


try:
    results = sys.argv[1]
    output_html = sys.argv[2]
except:
    print('Usage: %s <results_file> <confusion_matrix_html_file>' % sys.argv[0])
    exit(1)

cm = ConfusionMatrix()
cm.load(results)
open(output_html, 'w').write(cm.toHtml())


Ejemplo n.º 13
0
# Gaia is free software: you can redistribute it and/or modify it under
# the terms of the GNU Affero General Public License as published by the Free
# Software Foundation (FSF), either version 3 of the License, or (at your
# option) any later version.
#
# This program is distributed in the hope that it will be useful, but WITHOUT
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
# FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
# details.
#
# You should have received a copy of the Affero GNU General Public License     
# version 3 along with this program. If not, see http://www.gnu.org/licenses/


import sys
from gaia2.classification import ConfusionMatrix


try:
    results = sys.argv[1]
    output_html = sys.argv[2]
except:
    print 'Usage: %s <results_file> <confusion_matrix_html_file>' % sys.argv[0]
    exit(1)

cm = ConfusionMatrix()
cm.load(results)
open(output_html, 'w').write(cm.toHtml())