예제 #1
0
def main(argv=sys.argv):
    if len(argv) != 2 and len(argv) != 3:
        usage(argv)
    config_uri = argv[1]
    conf_parser_args = argv[2]
    conf_parser = dict((k, True) for k in conf_parser_args.split(','))
    setup_logging(config_uri)
    settings = get_appsettings(config_uri)
    engine = engine_from_config(settings, 'sqlalchemy.')
    DBSession.configure(bind=engine)
    script_dir = os.path.dirname(__file__)
    rel_path_source_data = "./csv/"
    source_data_dir = os.path.join(script_dir, rel_path_source_data)
    csv_data = parse_data(source_data_dir)
    analyze(csv_data, conf_parser)
예제 #2
0
파일: control.py 프로젝트: agamrat/ml-598
if len(sys.argv) < 2 or sys.argv[1] == "--help":
    print "Usage: control.py controlfile"
    print "Usage: control.py -lda trainingfile testfile"
    print " Control files look like: "
    print "    First line: trainingfilename, testfilename"
    print "    Any number of subsequent lines: epsilon,stepsize,iteration limit, restarts"
    print "    Training and test files are CSV's with the output variable last"
    print "    Results are stored in testfilename_resultX where X is the line number from the control CSV"
    sys.exit(0)

#lda control here
if sys.argv[1] == "-lda" or sys.argv[1] == "-bayes":
    if len(sys.argv) != 4:
        print "Usage: control.py (-lda|-bayes) trainingfile testfile"
        sys.exit(0)
    (x,y)= csv_parser.parse_data(sys.argv[2])
    (testX, testY) = csv_parser.parse_data(sys.argv[3])
    if sys.argv[1] == "-lda":
        runLDA(x,y, testX, testY, sys.argv[3])
    else:
        runBayes(x,y, testX, testY, sys.argv[3])
    sys.exit(0)   

#default is logistic regression
(controls, training, test) = csv_parser.parse_control(sys.argv[1])

(x,y)= csv_parser.parse_data(training)
(testX, testY) = csv_parser.parse_data(test)

for i in xrange(len(controls)):
    params = controls[i]
예제 #3
0
파일: kfolds.py 프로젝트: agamrat/ml-598
		if(y[i] == 0):
			train_x.append(x[i])
			train_y.append(y[i])
			del x[i]
			del y[i]
			num_positives = num_positives - 1
	
	print "size of x: " + str(len(train_x)) + " size of y: " + str(len(train_y)) + "size of testset: " + str(len(testset[0]))

	#start off by doing feature selection
	#(results,  temp_x_onlyspon, temp_x_congress, temp_x_nopers) = testfeaturesubsets(k, train_x, train_y, testset[0], testset[1])
	(results) = testfeaturesubsets(k, train_x, train_y, testset[0], testset[1])
	for set_name, algo_data in results.iteritems():
		print (set_name + "\n LR train" + "\t" + str(algo_data[0]) + "\t accuracy: " + str(accuracycalc(algo_data[0]))  + "\t accuracy: " + str(fcalc(algo_data[0]))
			+ "\n LR valid" + "\t" + str(algo_data[1]) + "\t A: " + str(accuracycalc(algo_data[1]))  + "\t F: " + str(fcalc(algo_data[1]))
			+ "\n LR isolated" + "\t" + str(algo_data[2]) + "\t Acuracy: " + str(accuracycalc(algo_data[2]))  + "\t F: " + str(fcalc(algo_data[2]))
			+ "\n LDA train" + "\t" + str(algo_data[3]) + "\t A: " + str(accuracycalc(algo_data[3]))  + "\t F: " + str(fcalc(algo_data[3]))
			+ "\n LDA valid" + "\t" + str(algo_data[4]) + "\t A: " + str(accuracycalc(algo_data[4]))  + "\t F: " + str(fcalc(algo_data[4]))
			+ "\n LDA isolated" + "\t" + str(algo_data[5]) + "\t A: " + str(accuracycalc(algo_data[5]))  + "\t F: " + str(fcalc(algo_data[5]))
			+ "\n NB train" + "\t" + str(algo_data[6]) + "\t A: " + str(accuracycalc(algo_data[6]))  + "\t F: " + str(fcalc(algo_data[6]))
			+ "\n NB valid" + "\t" + str(algo_data[7]) + "\t A: " + str(accuracycalc(algo_data[7]))  + "\t F: " + str(fcalc(algo_data[7]))
			+ "\n NB isolated" + "\t" + str(algo_data[8]) + "\t A: " + str(accuracycalc(algo_data[8]))  + "\t F: " + str(fcalc(algo_data[8]))
			+ "\n")


		
print "parsing..."
(x,y) = csv_parser.parse_data("final_data.csv")
print "parsed"
kfolds_control(4, x, y)
예제 #4
0
    #(results,  temp_x_onlyspon, temp_x_congress, temp_x_nopers) = testfeaturesubsets(k, train_x, train_y, testset[0], testset[1])
    (results) = testfeaturesubsets(k, train_x, train_y, testset[0], testset[1])
    for set_name, algo_data in results.iteritems():
        print(set_name + "\n LR train" + "\t" + str(algo_data[0]) +
              "\t accuracy: " + str(accuracycalc(algo_data[0])) +
              "\t accuracy: " + str(fcalc(algo_data[0])) + "\n LR valid" +
              "\t" + str(algo_data[1]) + "\t A: " +
              str(accuracycalc(algo_data[1])) + "\t F: " +
              str(fcalc(algo_data[1])) + "\n LR isolated" + "\t" +
              str(algo_data[2]) + "\t Acuracy: " +
              str(accuracycalc(algo_data[2])) + "\t F: " +
              str(fcalc(algo_data[2])) + "\n LDA train" + "\t" +
              str(algo_data[3]) + "\t A: " + str(accuracycalc(algo_data[3])) +
              "\t F: " + str(fcalc(algo_data[3])) + "\n LDA valid" + "\t" +
              str(algo_data[4]) + "\t A: " + str(accuracycalc(algo_data[4])) +
              "\t F: " + str(fcalc(algo_data[4])) + "\n LDA isolated" + "\t" +
              str(algo_data[5]) + "\t A: " + str(accuracycalc(algo_data[5])) +
              "\t F: " + str(fcalc(algo_data[5])) + "\n NB train" + "\t" +
              str(algo_data[6]) + "\t A: " + str(accuracycalc(algo_data[6])) +
              "\t F: " + str(fcalc(algo_data[6])) + "\n NB valid" + "\t" +
              str(algo_data[7]) + "\t A: " + str(accuracycalc(algo_data[7])) +
              "\t F: " + str(fcalc(algo_data[7])) + "\n NB isolated" + "\t" +
              str(algo_data[8]) + "\t A: " + str(accuracycalc(algo_data[8])) +
              "\t F: " + str(fcalc(algo_data[8])) + "\n")


print "parsing..."
(x, y) = csv_parser.parse_data("final_data.csv")
print "parsed"
kfolds_control(4, x, y)