Esempio n. 1
0
    if len(sys.argv) != 4:
        print >> sys.stderr, "Usage: classify.py [TAB_FILE] [classifier: tree, bayes, svm, logreg] [number to filter, 0 -> no filtering]"
        sys.exit(1)

    data = proj_utils.load_data(sys.argv[1])
    type = sys.argv[2]
    features = int(sys.argv[3])

    train_data, test_data = proj_utils.partition_data(data)

    model = train_classifier(train_data, type, features)
    train_CA, train_results = proj_utils.test_classifier(model, train_data)
    test_CA, test_results = proj_utils.test_classifier(model, test_data)

    #print "Train Accuracy: %f, Test Accuracy: %f" % (train_CA, test_CA)
    train_stats = proj_utils.get_stats(train_results)
    test_stats = proj_utils.get_stats(test_results)

    print "Train:\n%s" % str(train_stats)
    print "\nTest:\n%s" % str(test_stats)

    if features != 0:
        print "Features selected:\n%s" % str(model.domain)
        filename = '%s%sresults%s%s_filtered_%s.txt' % (
            os.path.dirname(__file__), os.sep, os.sep, type, str(features)
        )  #+ type + '_filtered_' + str(features) + '.txt'
    else:
        filename = '%s%sresults%s%s.txt' % (os.path.dirname(__file__), os.sep,
                                            os.sep, type)
    print filename
Esempio n. 2
0
        sys.exit(1)

    proportions = 0.1
    if len(sys.argv) >= 3:
        proportions = float(sys.argv[2])

    data = proj_utils.load_data(sys.argv[1])

    train_data, test_data = proj_utils.partition_data(data)

    print "\"Proportion\"",
    proj_utils.print_csv_header()
    #for prop in (0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0):
    for prop in (1.0, 1.5, 2.0, 2.5, 3.0, 3.5):
        resampled_train_data = proj_utils.undersample_class(
            train_data, 'nonad', prop)

        model = train_classifier(resampled_train_data)
        train_CA, train_results = proj_utils.test_classifier(
            model, resampled_train_data)
        test_CA, test_results = proj_utils.test_classifier(model, test_data)

        #print "Train Accuracy: %f, Test Accuracy: %f" % (train_CA, test_CA)
        #train_stats = proj_utils.get_stats(train_results)
        test_stats = proj_utils.get_stats(test_results)

        #print "Train:\n%s" % str(train_stats)
        #print "\nTest:\n%s\n" % str(test_stats)
        print "%f, " % prop,
        proj_utils.print_results_csv(test_stats)
Esempio n. 3
0
    if len(sys.argv) != 4:
        print >> sys.stderr, "Usage: classify.py [TAB_FILE] [classifier: tree, bayes, svm, logreg] [number to filter, 0 -> no filtering]"
        sys.exit(1)

    data = proj_utils.load_data(sys.argv[1])
    type = sys.argv[2]
    features = int(sys.argv[3])

    train_data, test_data = proj_utils.partition_data(data)

    model = train_classifier(train_data, type, features)
    train_CA, train_results = proj_utils.test_classifier(model, train_data)
    test_CA, test_results = proj_utils.test_classifier(model, test_data)

    #print "Train Accuracy: %f, Test Accuracy: %f" % (train_CA, test_CA)
    train_stats = proj_utils.get_stats(train_results)
    test_stats = proj_utils.get_stats(test_results)

    print "Train:\n%s" % str(train_stats)
    print "\nTest:\n%s" % str(test_stats)
	
    if features != 0:
		print "Features selected:\n%s" % str(model.domain)
		filename = '%s%sresults%s%s_filtered_%s.txt' % (os.path.dirname(__file__), os.sep, os.sep, type, str(features)) #+ type + '_filtered_' + str(features) + '.txt'
    else:
		filename = '%s%sresults%s%s.txt' % (os.path.dirname(__file__), os.sep, os.sep, type)
    print filename
		
    f = open(filename, 'w+')
    f.write("Train:\n")
    f.write(str(train_stats) + "\n")
Esempio n. 4
0
        sys.exit(1)

    proportions = 0.1
    if len(sys.argv) >= 3:
        proportions = float(sys.argv[2])

    data = proj_utils.load_data(sys.argv[1])

    train_data, test_data = proj_utils.partition_data(data)

    print "\"Proportion\"",
    proj_utils.print_csv_header()
    #for prop in (0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0):
    for prop in (1.0,1.5,2.0,2.5,3.0,3.5):
        resampled_train_data = proj_utils.undersample_class(train_data, 'nonad', prop)

        model = train_classifier(resampled_train_data)
        train_CA, train_results = proj_utils.test_classifier(model, resampled_train_data)
        test_CA, test_results = proj_utils.test_classifier(model, test_data)

        #print "Train Accuracy: %f, Test Accuracy: %f" % (train_CA, test_CA)
        #train_stats = proj_utils.get_stats(train_results)
        test_stats = proj_utils.get_stats(test_results)

        #print "Train:\n%s" % str(train_stats)
        #print "\nTest:\n%s" % str(test_stats)
        print "%f, " % prop,
        proj_utils.print_results_csv(test_stats)