Esempi in Python per create_model, esempi in Python per copyright_library.create_model

Esempio n. 1

0

Mostra file

        return(setup_database(options.drop))

    if not options.model:
        print >> sys.stdout, 'You must specify a model file for all phases of the algorithm.\n\n'
        optparser.print_usage()
        sys.exit(1)

    model = {}
    if options.training:
        files = [line.rstrip() for line in open(options.training).readlines()]
        if os.path.exists('training.dat'):
            hash = pickle.load(open('training.dat','r'))
            if hash == files and os.path.exists(options.model):
                return 0
        training_data = [open(file).read() for file in files if os.path.exists(file)]
        model = library.create_model(training_data)
        pickle.dump(model, open(options.model,'w'))
        pickle.dump(files, open('training.dat', 'w'))

    try:
        model = pickle.load(open(options.model))
    except:
        print >> sys.stdout, 'You must specify a training file to create a model.\n\n'
        optparser.print_usage()
        sys.exit(1)

    if options.version:
        print "Source hash: %s" % hex(abs(hash(open(sys.argv[0]).read())))
        print 'Model hash: %s' % (model['id'])
    
    if options.analyze_from_file:

Esempio n. 2

0

Mostra file

File: tests.py Progetto: Triangled/fossology

def test2():
    """
    This test determines if the naive Bayes classifier can correctly classify
    data that it has not seen before.

    This test will fail if our algorithm for smoothing tokens that we have
    not seen yet is broken. Since 'C' is the trigger for the start of a
    statement the probability of 'C unknown' should be very high.

    Returns a tuple giving the number of test passed, the number of
    tests failed and a string holding all log messages, i.e. 
        (3, 4, 'Failed Test[1]: Divide by 0.').
    """
    
    test = 2
    trainingdata = [
        '''A B <s>C D E F</s> G H I J''',
        '''G H <s>C E F D</s> A B I J''',
        '''I J <s>C F D E</s> G H A B''',
        '''a b <s>C D E f</s> I J G H''',
        ]
    testdata = [
        ['r', 'x', 'x', 'x', 'E', 'F', 'G', 'H', 'I', 'J'],
        ['r', 'x', 'x', 'y', 'F', 'D', 'A', 'B', 'I', 'J'],
        ['I', 'J', 'C', 'D', 'z', 'E', 'G', 'H', 'A', 'B'],
        ['a', 'b', 'C', 'D', 'E', 'w', 'I', 'J', 'G', 'H'],
        ]
    correct_output = [
            ['O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O'],
            ['O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O'],
            ['O', 'O', 'B', 'I', 'I', 'I', 'O', 'O', 'O', 'O'],
            ['O', 'O', 'B', 'I', 'I', 'I', 'O', 'O', 'O', 'O'],
        ]
    model = library.create_model(trainingdata)
    
    n = len(testdata)
    passed = 0
    log = "Test%d started at %s.\n" % (test, time.ctime())
    for i in range(n):
        try:
            out = library.label_nb(model['P(F|C)'], testdata[i])
            if len(out) != len(correct_output[i]):
                log += "Test%d [%d] Failed.\n" % (tet, i)
                log += "\tOutput from library.label_nb() was the incorrect length.\n"
                log += "\tGot '%s' instead of '%s'.\n" % (str(out), 
                        str(correct_output[i]))
                continue
            if out != correct_output[i]:
                log += "Test%d [%d] Failed.\n" % (test, i)
                log += "\tOutput from library.label_nb() was incorrect.\n"
                log += "\tGot '%s' instead of '%s'.\n" % (str(out), 
                        str(correct_output[i]))
                continue
        except Exception, e:
            log += "Test%d [%d] Failed.\n" % (test, i)
            log += "\tRecieved the following exception:\n"
            exceptionType, exceptionValue, exceptionTraceback = sys.exc_info()
            p = '\t'.join(traceback.format_exception(exceptionType, 
                exceptionValue, exceptionTraceback))
            log += "\t%s\n" % p
            continue
        passed += 1

Esempio n. 3

0

Mostra file

def test2():
    """
    This test determines if the naive Bayes classifier can correctly classify
    data that it has not seen before.

    This test will fail if our algorithm for smoothing tokens that we have
    not seen yet is broken. Since 'C' is the trigger for the start of a
    statement the probability of 'C unknown' should be very high.

    Returns a tuple giving the number of test passed, the number of
    tests failed and a string holding all log messages, i.e. 
        (3, 4, 'Failed Test[1]: Divide by 0.').
    """

    test = 2
    trainingdata = [
        '''A B <s>C D E F</s> G H I J''',
        '''G H <s>C E F D</s> A B I J''',
        '''I J <s>C F D E</s> G H A B''',
        '''a b <s>C D E f</s> I J G H''',
    ]
    testdata = [
        ['r', 'x', 'x', 'x', 'E', 'F', 'G', 'H', 'I', 'J'],
        ['r', 'x', 'x', 'y', 'F', 'D', 'A', 'B', 'I', 'J'],
        ['I', 'J', 'C', 'D', 'z', 'E', 'G', 'H', 'A', 'B'],
        ['a', 'b', 'C', 'D', 'E', 'w', 'I', 'J', 'G', 'H'],
    ]
    correct_output = [
        ['O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O'],
        ['O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O'],
        ['O', 'O', 'B', 'I', 'I', 'I', 'O', 'O', 'O', 'O'],
        ['O', 'O', 'B', 'I', 'I', 'I', 'O', 'O', 'O', 'O'],
    ]
    model = library.create_model(trainingdata)

    n = len(testdata)
    passed = 0
    log = "Test%d started at %s.\n" % (test, time.ctime())
    for i in range(n):
        try:
            out = library.label_nb(model['P(F|C)'], testdata[i])
            if len(out) != len(correct_output[i]):
                log += "Test%d [%d] Failed.\n" % (tet, i)
                log += "\tOutput from library.label_nb() was the incorrect length.\n"
                log += "\tGot '%s' instead of '%s'.\n" % (
                    str(out), str(correct_output[i]))
                continue
            if out != correct_output[i]:
                log += "Test%d [%d] Failed.\n" % (test, i)
                log += "\tOutput from library.label_nb() was incorrect.\n"
                log += "\tGot '%s' instead of '%s'.\n" % (
                    str(out), str(correct_output[i]))
                continue
        except Exception, e:
            log += "Test%d [%d] Failed.\n" % (test, i)
            log += "\tRecieved the following exception:\n"
            exceptionType, exceptionValue, exceptionTraceback = sys.exc_info()
            p = '\t'.join(
                traceback.format_exception(exceptionType, exceptionValue,
                                           exceptionTraceback))
            log += "\t%s\n" % p
            continue
        passed += 1

Esempio n. 4

0

Mostra file

File: tests.py Progetto: Triangled/fossology

def test1():
    """
    This test determines if the naive Bayes classifier can correctly classify
    data that it has already seen.

    Please note that naive Bayes is a linear classifier so there may be data
    that it will not be able to correctly classify even though it has been
    trained on the same data.

    Returns a tuple giving the number of test passed, the number of
    tests failed and a string holding all log messages, i.e. 
        (3, 4, 'Failed Test [1]: Divide by 0.').
    """

    test = 1
    trainingdata = [
        '''A B <s>C D E F</s> G H I J''',
        '''G H C E F D A B I J''',
        '''I J C F D E G H A B''',
        '''a b <s>C D E f</s> I J G H''',
        ]
    testdata = [
        ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J'],
        ['G', 'H', 'C', 'E', 'F', 'D', 'A', 'B', 'I', 'J'],
        ['I', 'J', 'C', 'F', 'D', 'E', 'G', 'H', 'A', 'B'],
        ['a', 'b', 'C', 'D', 'E', 'f', 'I', 'J', 'G', 'H'],
        ]
    correct_output = [
            ['O', 'O', 'B', 'I', 'I', 'I', 'O', 'O', 'O', 'O'],
            ['O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O'],
            ['O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O'],
            ['O', 'O', 'B', 'I', 'I', 'I', 'O', 'O', 'O', 'O'],
        ]
    model = library.create_model(trainingdata)
    
    n = len(testdata)
    passed = 0
    log = "Test%d started at %s.\n" % (test, time.ctime())
    for i in range(n):
        try:
            out = library.label_nb(model['P(F|C)'], testdata[i])
            if len(out) != len(correct_output[i]):
                log += "Test%d [%d] Failed.\n" % (tet, i)
                log += "\tOutput from library.label_nb() was the incorrect length.\n"
                log += "\tGot '%s' instead of '%s'.\n" % (str(out), 
                        str(correct_output[i]))
                continue
            if out != correct_output[i]:
                log += "Test%d [%d] Failed.\n" % (test, i)
                log += "\tOutput from library.label_nb() was incorrect.\n"
                log += "\tGot '%s' instead of '%s'.\n" % (str(out), 
                        str(correct_output[i]))
                continue
        except Exception, e:
            log += "Test%d [%d] Failed.\n" % (test, i)
            log += "\tRecieved the following exception:\n"
            exceptionType, exceptionValue, exceptionTraceback = sys.exc_info()
            p = '\t'.join(traceback.format_exception(exceptionType, 
                exceptionValue, exceptionTraceback))
            log += "\t%s\n" % p
            continue
        passed += 1

Esempio n. 5

0

Mostra file

def test1():
    """
    This test determines if the naive Bayes classifier can correctly classify
    data that it has already seen.

    Please note that naive Bayes is a linear classifier so there may be data
    that it will not be able to correctly classify even though it has been
    trained on the same data.

    Returns a tuple giving the number of test passed, the number of
    tests failed and a string holding all log messages, i.e. 
        (3, 4, 'Failed Test [1]: Divide by 0.').
    """

    test = 1
    trainingdata = [
        '''A B <s>C D E F</s> G H I J''',
        '''G H C E F D A B I J''',
        '''I J C F D E G H A B''',
        '''a b <s>C D E f</s> I J G H''',
    ]
    testdata = [
        ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J'],
        ['G', 'H', 'C', 'E', 'F', 'D', 'A', 'B', 'I', 'J'],
        ['I', 'J', 'C', 'F', 'D', 'E', 'G', 'H', 'A', 'B'],
        ['a', 'b', 'C', 'D', 'E', 'f', 'I', 'J', 'G', 'H'],
    ]
    correct_output = [
        ['O', 'O', 'B', 'I', 'I', 'I', 'O', 'O', 'O', 'O'],
        ['O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O'],
        ['O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O'],
        ['O', 'O', 'B', 'I', 'I', 'I', 'O', 'O', 'O', 'O'],
    ]
    model = library.create_model(trainingdata)

    n = len(testdata)
    passed = 0
    log = "Test%d started at %s.\n" % (test, time.ctime())
    for i in range(n):
        try:
            out = library.label_nb(model['P(F|C)'], testdata[i])
            if len(out) != len(correct_output[i]):
                log += "Test%d [%d] Failed.\n" % (tet, i)
                log += "\tOutput from library.label_nb() was the incorrect length.\n"
                log += "\tGot '%s' instead of '%s'.\n" % (
                    str(out), str(correct_output[i]))
                continue
            if out != correct_output[i]:
                log += "Test%d [%d] Failed.\n" % (test, i)
                log += "\tOutput from library.label_nb() was incorrect.\n"
                log += "\tGot '%s' instead of '%s'.\n" % (
                    str(out), str(correct_output[i]))
                continue
        except Exception, e:
            log += "Test%d [%d] Failed.\n" % (test, i)
            log += "\tRecieved the following exception:\n"
            exceptionType, exceptionValue, exceptionTraceback = sys.exc_info()
            p = '\t'.join(
                traceback.format_exception(exceptionType, exceptionValue,
                                           exceptionTraceback))
            log += "\t%s\n" % p
            continue
        passed += 1