def main(args):
	paramsfn = args[0]
	exec(open(paramsfn,'r').read())
	
	pdfpages = PdfPages('%s.pdf' % (outputlog))
		
	output_test,input_test = load_data(testdatafilename)
		
	if doscale:
		maxinput,mininput = read_scale_data(scaledatafilename)
		input_test = (input_test-mininput)/(maxinput-mininput)
						
	if choose_specific_features:
		for specific_selected_choice in specific_selected_features:
			inputfiltered_test = input_test[:,specific_selected_choice]

			m = svm_load_model(modelfilename)
			if posclass == 'auto':
				posclass = m.get_labels()
				posclass = posclass[0]
				

			pred_labels, (ACC, MSE, SCC), pred_values = svm_predict(output_test,[list(x) for x in inputfiltered_test],m)
			ACC,confusionmatrix = mygrid.evaluations_classify(output_test, [x[0] for x in pred_values],posclass,optbias)
			db = array([[output_test[i],pred_values[i][0]] for i in range(len(output_test))])
			
			neg = len([x for x in output_test if x != posclass])
			pos = len(output_test)-neg;
			if neg != 0 and pos != 0:
				auc,topacc,optbias,top_tps_bias,top_fps = mygrid.calc_AUC(db,neg,pos,posclass,[],True,pdfpages,'Test ROC curve')

			print 'Test optimized accuracy = %g' % (ACC)
			print '================================'
			print '||   ||%6d |%6d |       ||' % (m.get_labels()[0],m.get_labels()[1])
			print '================================'
			print '||%3d||%6g |%6g |%6g ||' % (m.get_labels()[0],confusionmatrix[0,0],confusionmatrix[0,1],pos)#confusionmatrix[0,0]+confusionmatrix[0,1])
			print '||%3d||%6g |%6g |%6g ||' % (m.get_labels()[1],confusionmatrix[1,0],confusionmatrix[1,1],neg)#confusionmatrix[1,0]+confusionmatrix[1,1])
			print '||----------------------------||'
			print '||   ||%6g |%6g |%6g ||' % (confusionmatrix[0,0]+confusionmatrix[1,0],confusionmatrix[0,1]+confusionmatrix[1,1],pos+neg)#confusionmatrix[1,0]+confusionmatrix[1,1])
			print '================================'

			if outputpredictions:
				fout = open(predictionslog,'w')
				for label in pred_labels:
					print >> fout, label
				fout.close()
				
			if outputlog != '':
				fout = open(outputlog,'a')
				print >> fout, '========================='
				print >> fout, specific_selected_choice
				print >> fout, ACC,auc
				fout.close()
	else:
		

		m = svm_load_model(modelfilename)
		if posclass == 'auto':
			posclass = m.get_labels()
			posclass = posclass[0]

		pred_labels, (ACC, MSE, SCC), pred_values = svm_predict(output_test,[list(x) for x in input_test],m)
		ACC,confusionmatrix = mygrid.evaluations_classify(output_test, [x[0] for x in pred_values],posclass,optbias)
		db = array([[output_test[i],pred_values[i][0]] for i in range(len(output_test))])
		neg = len([x for x in output_test if x != posclass])
		pos = len(output_test)-neg;
		if neg != 0 and pos != 0:
			auc,topacc,optbias,top_tps_bias,top_fps = mygrid.calc_AUC(db,neg,pos,posclass,[],True,pdfpages,'Test ROC curve')
		
		print 'Test optimized accuracy = %g' % (ACC)
		print '================================'
		print '||   ||%6d |%6d |       ||' % (m.get_labels()[0],m.get_labels()[1])
		print '================================'
		print '||%3d||%6g |%6g |%6g ||' % (m.get_labels()[0],confusionmatrix[0,0],confusionmatrix[0,1],pos)#confusionmatrix[0,0]+confusionmatrix[0,1])
		print '||%3d||%6g |%6g |%6g ||' % (m.get_labels()[1],confusionmatrix[1,0],confusionmatrix[1,1],neg)#confusionmatrix[1,0]+confusionmatrix[1,1])
		print '||----------------------------||'
		print '||   ||%6g |%6g |%6g ||' % (confusionmatrix[0,0]+confusionmatrix[1,0],confusionmatrix[0,1]+confusionmatrix[1,1],pos+neg)#confusionmatrix[1,0]+confusionmatrix[1,1])
		print '================================'

		
		
		if outputpredictions:
			fout = open(testdatafilename + '_predictions.dat','w')
			for label in pred_labels:
				print >> fout, label
			fout.close()
			
		if outputlog != '':
			fout = open(outputlog,'a')
			print >> fout, '========================='
			print >> fout, ACC
			fout.close()
	
	pdfpages.close()
def main(args):
    paramsfn = args[0]
    exec(open(paramsfn, 'r').read())

    pdfpages = PdfPages('%s.pdf' % (outputlog))

    output_test, input_test = load_data(testdatafilename)

    if doscale:
        maxinput, mininput = read_scale_data(scaledatafilename)
        input_test = (input_test - mininput) / (maxinput - mininput)

    if choose_specific_features:
        for specific_selected_choice in specific_selected_features:
            inputfiltered_test = input_test[:, specific_selected_choice]

            m = svm_load_model(modelfilename)
            if posclass == 'auto':
                posclass = m.get_labels()
                posclass = posclass[0]

            pred_labels, (ACC, MSE, SCC), pred_values = svm_predict(
                output_test, [list(x) for x in inputfiltered_test], m)
            ACC, confusionmatrix = mygrid.evaluations_classify(
                output_test, [x[0] for x in pred_values], posclass, optbias)
            db = array([[output_test[i], pred_values[i][0]]
                        for i in range(len(output_test))])

            neg = len([x for x in output_test if x != posclass])
            pos = len(output_test) - neg
            if neg != 0 and pos != 0:
                auc, topacc, optbias, top_tps_bias, top_fps = mygrid.calc_AUC(
                    db, neg, pos, posclass, [], True, pdfpages,
                    'Test ROC curve')

            print 'Test optimized accuracy = %g' % (ACC)
            print '================================'
            print '||   ||%6d |%6d |       ||' % (m.get_labels()[0],
                                                  m.get_labels()[1])
            print '================================'
            print '||%3d||%6g |%6g |%6g ||' % (
                m.get_labels()[0], confusionmatrix[0, 0], confusionmatrix[0,
                                                                          1],
                pos)  #confusionmatrix[0,0]+confusionmatrix[0,1])
            print '||%3d||%6g |%6g |%6g ||' % (
                m.get_labels()[1], confusionmatrix[1, 0], confusionmatrix[1,
                                                                          1],
                neg)  #confusionmatrix[1,0]+confusionmatrix[1,1])
            print '||----------------------------||'
            print '||   ||%6g |%6g |%6g ||' % (
                confusionmatrix[0, 0] + confusionmatrix[1, 0],
                confusionmatrix[0, 1] + confusionmatrix[1, 1], pos + neg
            )  #confusionmatrix[1,0]+confusionmatrix[1,1])
            print '================================'

            if outputpredictions:
                fout = open(predictionslog, 'w')
                for label in pred_labels:
                    print >> fout, label
                fout.close()

            if outputlog != '':
                fout = open(outputlog, 'a')
                print >> fout, '========================='
                print >> fout, specific_selected_choice
                print >> fout, ACC, auc
                fout.close()
    else:

        m = svm_load_model(modelfilename)
        if posclass == 'auto':
            posclass = m.get_labels()
            posclass = posclass[0]

        pred_labels, (ACC, MSE, SCC), pred_values = svm_predict(
            output_test, [list(x) for x in input_test], m)
        ACC, confusionmatrix = mygrid.evaluations_classify(
            output_test, [x[0] for x in pred_values], posclass, optbias)
        db = array([[output_test[i], pred_values[i][0]]
                    for i in range(len(output_test))])
        neg = len([x for x in output_test if x != posclass])
        pos = len(output_test) - neg
        if neg != 0 and pos != 0:
            auc, topacc, optbias, top_tps_bias, top_fps = mygrid.calc_AUC(
                db, neg, pos, posclass, [], True, pdfpages, 'Test ROC curve')

        print 'Test optimized accuracy = %g' % (ACC)
        print '================================'
        print '||   ||%6d |%6d |       ||' % (m.get_labels()[0],
                                              m.get_labels()[1])
        print '================================'
        print '||%3d||%6g |%6g |%6g ||' % (
            m.get_labels()[0], confusionmatrix[0, 0], confusionmatrix[0, 1],
            pos)  #confusionmatrix[0,0]+confusionmatrix[0,1])
        print '||%3d||%6g |%6g |%6g ||' % (
            m.get_labels()[1], confusionmatrix[1, 0], confusionmatrix[1, 1],
            neg)  #confusionmatrix[1,0]+confusionmatrix[1,1])
        print '||----------------------------||'
        print '||   ||%6g |%6g |%6g ||' % (
            confusionmatrix[0, 0] + confusionmatrix[1, 0],
            confusionmatrix[0, 1] + confusionmatrix[1, 1], pos + neg
        )  #confusionmatrix[1,0]+confusionmatrix[1,1])
        print '================================'

        if outputpredictions:
            fout = open(testdatafilename + '_predictions.dat', 'w')
            for label in pred_labels:
                print >> fout, label
            fout.close()

        if outputlog != '':
            fout = open(outputlog, 'a')
            print >> fout, '========================='
            print >> fout, ACC
            fout.close()

    pdfpages.close()
def main(args):
    paramsfn = args[0]
    exec(open(paramsfn, 'r').read())

    pdfpages = PdfPages('%s.pdf' % (outputlog))

    output_test, input_test, fieldnames = load_data(testdatafilename)
    if binarizeoutput:
        output_test = [1 if x > boundary else -1 for x in output_test]

    if doscale:
        maxinput, mininput = read_scale_data(scaledatafilename)
        input_test = (input_test - mininput) / (maxinput - mininput)

    if donormalize:
        means, stds = read_meansigma(normalizationfilename)
        input_test = (input_test - means) / stds

    if choose_specific_features:
        for specific_selected_choice in specific_selected_features:
            inputfiltered_test = input_test[:, specific_selected_choice]

            if dopca:
                coeffs, means = read_pcacoeffs(pcacoefffilename)
                temp = (inputfiltered_test - means).T
                inputfiltered_test = dot(coeffs.T, temp).T

            m = svm_load_model(modelfilename)
            if posclass == 'auto':
                posclass = m.get_labels()[0]
                negclass = m.get_labels()[1]

            print posclass
            print negclass

            #			print len(output_test)
            pred_labels, (ACC, MSE, SCC), pred_values = svm_predict(
                output_test, [list(x) for x in inputfiltered_test], m,
                '-b %d' % (int(useprob)))
            pred_labels = [
                posclass if pred_values[i][0] > optbias else negclass
                for i in xrange(len(output_test))
            ]
            #			print len(pred_labels)
            ACC, PHI, confusionmatrix = mygrid.evaluations_classify(
                output_test, [x[0] for x in pred_values], posclass, optbias)
            db = array([[output_test[i], pred_values[i][0]]
                        for i in range(len(output_test))])

            neg = len([x for x in output_test if x != posclass])
            pos = len(output_test) - neg

            if neg != 0 and pos != 0:
                auc, topacc, optaccbias, topphi, optphibias, top_tps_bias, top_fps = mygrid.calc_AUC(
                    db, neg, pos, posclass, useprob, [], True, pdfpages,
                    'Test ROC curve', optbias)

            numpred_pos = confusionmatrix[0, 0] + confusionmatrix[1, 0]
            numpred_neg = confusionmatrix[0, 1] + confusionmatrix[1, 1]

            N = pos + neg
            probchance = (numpred_pos * pos + numpred_neg * neg) * 1.0 / (N *
                                                                          N)
            kappa = (ACC / 100.0 - probchance) * 1.0 / (1 - probchance)

            print 'Test optimized Phi statistic = %g' % (PHI)
            print 'Test optimized accuracy = %g' % (ACC / 100.0)
            print 'Test optimized kappa = %g' % (kappa)
            if pos == 0 or neg == 0:
                print 'TP/RECALL = %g, FP = %g, PRECISION = %g' % (nan, nan,
                                                                   nan)
            else:
                print 'TP/RECALL = %g, FP = %g, PRECISION = %g' % (
                    confusionmatrix[0, 0] / pos, confusionmatrix[1, 0] / neg,
                    confusionmatrix[0, 0] /
                    (confusionmatrix[0, 0] + confusionmatrix[1, 0]))
            print '================================'
            print '||   ||%6d |%6d |       ||' % (m.get_labels()[0],
                                                  m.get_labels()[1])
            print '================================'
            print '||%3d||%6g |%6g |%6g ||' % (
                m.get_labels()[0], confusionmatrix[0, 0], confusionmatrix[0,
                                                                          1],
                pos)  #confusionmatrix[0,0]+confusionmatrix[0,1])
            print '||%3d||%6g |%6g |%6g ||' % (
                m.get_labels()[1], confusionmatrix[1, 0], confusionmatrix[1,
                                                                          1],
                neg)  #confusionmatrix[1,0]+confusionmatrix[1,1])
            print '||----------------------------||'
            print '||   ||%6g |%6g |%6g ||' % (
                confusionmatrix[0, 0] + confusionmatrix[1, 0],
                confusionmatrix[0, 1] + confusionmatrix[1, 1], pos + neg
            )  #confusionmatrix[1,0]+confusionmatrix[1,1])
            print '================================'

            if outputpredictions:
                fout = open(predictionslog, 'w')
                for ind in xrange(len(pred_values)):
                    groundtruth = output_test[ind]
                    label = pred_labels[ind]
                    value = pred_values[ind][0]
                    print >> fout, groundtruth, label, value,
                    oneinputrow = input_test[ind, :]
                    for j in xrange(len(oneinputrow)):
                        print >> fout, '%d:%f' % (j + 1, oneinputrow[j]),
                    print >> fout
                fout.close()

            if outputlog != '':
                fout = open(outputlog, 'a')
                print >> fout, '========================='
                print >> fout, 'SPECIFIC FIELDS:'
                print >> fout, specific_selected_choice
                for i in specific_selected_choice:
                    print >> fout, fieldnames[i],
                print >> fout
                if neg != 0 and pos != 0:
                    print >> fout, 'test: ACC=%g,AUC=%g' % (ACC, auc)
                fout.close()
    else:

        m = svm_load_model(modelfilename)
        if posclass == 'auto':
            posclass = m.get_labels()
            posclass = posclass[0]

        pred_labels, (ACC, MSE, SCC), pred_values = svm_predict(
            output_test, [list(x) for x in input_test], m,
            '-b %d' % (int(useprob)))
        ACC, PHI, confusionmatrix = mygrid.evaluations_classify(
            output_test, [x[0] for x in pred_values], posclass, optbias)
        db = array([[output_test[i], pred_values[i][0]]
                    for i in range(len(output_test))])
        neg = len([x for x in output_test if x != posclass])
        pos = len(output_test) - neg
        if neg != 0 and pos != 0:
            auc, topacc, optaccbias, topphi, optphibias, top_tps_bias, top_fps = mygrid.calc_AUC(
                db, neg, pos, posclass, useprob, [], True, pdfpages,
                'Test ROC curve', optbias)

        numpred_pos = confusionmatrix[0, 0] + confusionmatrix[1, 0]
        numpred_neg = confusionmatrix[0, 1] + confusionmatrix[1, 1]

        N = pos + neg
        probchance = (numpred_pos * pos + numpred_neg * neg) * 1.0 / (N * N)
        testkappa = (ACC - probchance) * 1.0 / (1 - probchance)

        print 'Test optimized Phi statistic = %g' % (PHI)
        print 'Test optimized accuracy = %g' % (ACC)
        print 'Test optimized kappa = %g' % (kappa)
        print 'TP/RECALL = %g, FP = %g, PRECISION = %g' % (
            confusionmatrix[0, 0] / pos, confusionmatrix[1, 0] / neg,
            confusionmatrix[0, 0] /
            (confusionmatrix[0, 0] + confusionmatrix[1, 0]))
        print '================================'
        print '||   ||%6d |%6d |       ||' % (m.get_labels()[0],
                                              m.get_labels()[1])
        print '================================'
        print '||%3d||%6g |%6g |%6g ||' % (
            m.get_labels()[0], confusionmatrix[0, 0], confusionmatrix[0, 1],
            pos)  #confusionmatrix[0,0]+confusionmatrix[0,1])
        print '||%3d||%6g |%6g |%6g ||' % (
            m.get_labels()[1], confusionmatrix[1, 0], confusionmatrix[1, 1],
            neg)  #confusionmatrix[1,0]+confusionmatrix[1,1])
        print '||----------------------------||'
        print '||   ||%6g |%6g |%6g ||' % (
            confusionmatrix[0, 0] + confusionmatrix[1, 0],
            confusionmatrix[0, 1] + confusionmatrix[1, 1], pos + neg
        )  #confusionmatrix[1,0]+confusionmatrix[1,1])
        print '================================'

        if outputpredictions:
            fout = open(testdatafilename + '_predictions.dat', 'w')
            for labelind in xrange(len(pred_labels)):
                print >> fout, output_test[labelind], pred_labels[
                    labelind], pred_values[labelind][0]
            fout.close()

        if outputlog != '':
            fout = open(outputlog, 'a')
            print >> fout, '========================='
            print >> fout, ACC
            fout.close()

    pdfpages.close()
Exemple #4
0
def main(args):
	paramsfn = args[0]
	exec(open(paramsfn,'r').read())
	
	
	if len(args) > 1:
		gammarange = [float(args[1])]
		crange = [float(args[2])]
	
	output,input,fieldnames,fold_inds = load_data(datafilename,use_specific_fold_inds)
	sep_validation = False
	if separate_validation_set != '':
		output_valid,input_valid,fieldnames,fold_inds_valid = load_data(separate_validation_set,use_specific_fold_inds)
		sep_validation = True


	fold_start = [-1]
	if sep_validation:
		fold_start_valid = [-1]
	
	if use_specific_fold_inds:
		unique_fold_ids = unique(fold_inds)
		row_inds = []
		outputcopy = []
		inputcopy = zeros([size(input,0),size(input,1)],dtype='float64')
		fold_start = [0]
		
		curind = 0
		for ind in unique_fold_ids:
			row_inds = [i for i in xrange(len(fold_inds)) if fold_inds[i] == ind]
			inputcopy[curind:curind+len(row_inds),:] = input[row_inds,:]
			outputcopy.extend([output[i] for i in row_inds])
			curind += len(row_inds)
			
			fold_start.append(fold_start[-1]+len(row_inds))
		input = inputcopy
		output = outputcopy
		nf = len(fold_start)-1
		
		if sep_validation:
			unique_fold_ids_valid = unique(fold_inds_valid)
			row_inds = []
			outputcopy = []
			inputcopy = zeros([size(input_valid,0),size(input_valid,1)],dtype='float64')
			fold_start_valid = [0]
			
			curind = 0
			for ind in unique_fold_ids_valid:
				row_inds = [i for i in xrange(len(fold_inds_valid)) if fold_inds_valid[i] == ind]
				inputcopy[curind:curind+len(row_inds),:] = input_valid[row_inds,:]
				outputcopy.extend([output_valid[i] for i in row_inds])
				curind += len(row_inds)
				
				fold_start_valid.append(fold_start_valid[-1]+len(row_inds))
			input_valid = inputcopy
			output_valid = outputcopy
			nf = len(fold_start_valid)-1
		
	if binarizeoutput:
		output,boundary = binarize_output(output,binary_threshold,binary_boundary_type)
	
	
	if testdatafilename != '':
		output_test,input_test,fieldnames,fold_inds_test = load_data(testdatafilename,False)
		if binarizeoutput:
			output_test = [1 if x > boundary else -1 for x in output_test]
	
	
	if doscale:
		maxinput = input.max(0);
		mininput = input.min(0);
		input = (input-mininput)/(maxinput-mininput)
		
		if testdatafilename != '':
			input_test = (input_test-mininput)/(maxinput-mininput)

		if savemodel:
			save_scale_data(datafilename+'_scales.dat',maxinput,mininput)
			
		if sep_validation:
			input_valid = (input_valid-mininput)/(maxinput-mininput)



	if donormalize:
		means = input.mean(0)
		stds = sqrt(input.var(0))
		input = (input-means)/stds
		if testdatafilename != '':
			input_test = (input_test-means)/stds

		if savemodel:
			save_zscore_data(datafilename+'_meansstdevs.dat',means,stds)
	
		if sep_validation:
			input_valid = (input_valid-means)/stds
		
	if numcpus == 'auto':
		p = Pool()
	else:
		p = Pool(numcpus)
	
	
	if choose_specific_features:
		if choose_specific_features_increasing:
			specific_selected_features = [specific_selected_features[:i] for i in xrange(2,len(specific_selected_features),2)]
			
		for specific_selected_choice in specific_selected_features:
			inputfiltered = input[:,specific_selected_choice]
			if sep_validation:
				inputfiltered_valid = input_valid[:,specific_selected_choice]

			if dopca:
				coeff,temp,latent = princomp(inputfiltered)

				if savemodel:
					save_pca_coeffs(datafilename+'_pcacoeffs.dat',coeff,mean(inputfiltered.T,axis=1))
				inputfiltered = temp
				if sep_validation:
					return
							
			with Timer():
			
				if sep_validation:
					if use_specific_fold_inds:
						results = mygrid.grid_classify_sepvalid (crange,gammarange,output,[list(x) for x in inputfiltered],output_valid,[list(x) for x in inputfiltered_valid],nf,useprob,timeout,p,fold_start,fold_start_valid)
					else:
						results = mygrid.grid_classify_sepvalid (crange,gammarange,output,[list(x) for x in inputfiltered],output_valid,[list(x) for x in inputfiltered_valid],nf,useprob,timeout,p)
				else:
					if use_specific_fold_inds:
						results = mygrid.grid_classify (crange,gammarange,output,[list(x) for x in inputfiltered],nf,useprob,timeout,p,fold_start)
					else:
						results = mygrid.grid_classify (crange,gammarange,output,[list(x) for x in inputfiltered],nf,useprob,timeout,p)

				
			param = svm.svm_parameter('-c %g -g %g -b %d' % (results[-2],results[-1],int(useprob)))
			
			prob = svm.svm_problem(output, [list(x) for x in inputfiltered])
			fold_start_p = (c_int *len(fold_start))()
			for i in xrange(len(fold_start)):
				fold_start_p[i] = fold_start[i]
			if posclass == 'auto':
				posclass = output[0]

			if sep_validation:
				prob_valid = svm.svm_problem(output_valid, [list(x) for x in inputfiltered_valid])
				testlength = prob_valid.l
				fold_start_p_valid = (c_int *len(fold_start_valid))()
				for i in xrange(len(fold_start_valid)):
					fold_start_p_valid[i] = fold_start_valid[i]
			else:
				testlength = prob.l	
								
			target = (c_double * testlength)()
										
			#[maxauc,maxoptacc,maxphi,minfpfnration,maxf1,optbias,optc,optgamma]
			
			if sep_validation:
				libsvm.svm_cross_validation_sepsets(prob, prob_valid,fold_start_p, fold_start_p_valid,param, nf, target)
			else:
				libsvm.svm_cross_validation(prob, fold_start_p, param, nf, target)
				
				
			if sep_validation:
				ys = prob_valid.y[:testlength]
			else:
				ys = prob.y[:prob.l]
				
			db = array([[ys[i],target[i]] for i in range(testlength)])
				
			
			neg = len([x for x in ys if x != posclass])
			pos = testlength-neg;

			if len(specific_selected_features) == 1 or True:
				pdfpages = PdfPages('%s_train.pdf' % (outputlog))
#				auc,topacc,optaccbias,topphi,optphibias,top_tps_bias,top_fps = mygrid.calc_AUC(db,neg,pos,posclass,useprob,[],True,pdfpages,'Optimal Cross-Validation ROC curve')
				topacc,topphi,minfpfnratio,topf1,auc,optbias = mygrid.optimize_results(db,neg,pos,posclass,'F1')
				print [topacc,results[1]]
				print [topphi,results[2]]
				print [topf1,results[4]]
				print [auc,results[0]]
				pdfpages.close()
#				print target
				if sep_validation:
					ACC,PHI,confusionmatrix = mygrid.evaluations_classify(output_valid,target,posclass,results[-3])
				else:
					ACC,PHI,confusionmatrix = mygrid.evaluations_classify(output,target,posclass,results[-3])
				if posclass == 1:
					negclass = 0;
				else:
					negclass = 1;

				numpred_pos = confusionmatrix[0,0]+confusionmatrix[1,0]
				numpred_neg = confusionmatrix[0,1]+confusionmatrix[1,1]
				
				N = pos+neg
				probchance = (numpred_pos*pos+numpred_neg*neg)*1.0/(N*N)
				kappa = (topacc-probchance)*1.0/(1-probchance);
				
				print 'Train optimized accuracy = %g' % (topacc)
				print 'Train optimized Phi statistic = %g' % (topphi)
				print 'Train optimized kappa = %g' % (kappa)
				print 'Train optimized F1 score = %f' % (topf1)
				print 'Train optimized TP/RECALL = %g, FP = %g, PRECISION = %g' % (confusionmatrix[0,0]/pos,confusionmatrix[1,0]/neg,confusionmatrix[0,0]/(confusionmatrix[0,0]+confusionmatrix[1,0]))
				print '================================'
				print '||   ||%6d |%6d |       ||' % (posclass,negclass)
				print '================================'
				print '||%3d||%6g |%6g |%6g ||' % (posclass,confusionmatrix[0,0],confusionmatrix[0,1],pos)#confusionmatrix[0,0]+confusionmatrix[0,1])
				print '||%3d||%6g |%6g |%6g ||' % (negclass,confusionmatrix[1,0],confusionmatrix[1,1],neg)#confusionmatrix[1,0]+confusionmatrix[1,1])
				print '||----------------------------||'
				print '||   ||%6g |%6g |%6g ||' % (confusionmatrix[0,0]+confusionmatrix[1,0],confusionmatrix[0,1]+confusionmatrix[1,1],pos+neg)#confusionmatrix[1,0]+confusionmatrix[1,1])
				print '================================'
				
				
			else:
				auc,topacc,optaccbias,topphi,optphibias,top_tps_bias,top_fps = mygrid.calc_AUC(db,neg,pos,posclass,useprob,[],False,0,'Optimal Cross-Validation ROC curve')
			
			print 'Optimal gamma = %g\nOptimal c = %g\nOptimal Bias = %g' % (results[-1],results[-2],results[-3])
			print 'Top CV results: AUC = %g, OPTIMIZED ACC = %g, OPTIMIZED PHI = %g' % (auc,topacc,topphi)

			if outputlog != '':
				fout = open(outputlog,'a')
				print >> fout, '========================='
				print >> fout, datafilename
				print >> fout, doscale, donormalize, dopca, '(scale/norm/pca)'
				print >> fout, crange[0],crange[-1], gammarange[0], gammarange[-1], '(cs,gammas)'
				print >> fout, use_specific_fold_inds, nf, '(use specific folds, numfold)'
				print >> fout, 'SPECIFIC FIELDS:'
				print >> fout, specific_selected_choice
				if fieldnames != []:
					for i in specific_selected_choice:
						print >> fout, fieldnames[i],
					print >> fout
				print >> fout, 'train: '
				print >> fout, '    AUC=%g,ACC=%g,kappa=%g,phi=%g,f1=%g (g=%g,c=%g,bias=%g)' % (auc,topacc,kappa,topphi,topf1,results[-1],results[-2],results[-3])
				print >> fout, '    ||%3d||%6g |%6g |%6g ||' % (posclass,confusionmatrix[0,0],confusionmatrix[0,1],pos)#confusionmatrix[0,0]+confusionmatrix[0,1])
				print >> fout, '    ||%3d||%6g |%6g |%6g ||' % (negclass,confusionmatrix[1,0],confusionmatrix[1,1],neg)#confusionmatrix[1,0]+confusionmatrix[1,1])
				fout.close()
			
			if outputpredictions:
				fout = open(predictionslog,'w')
				if sep_validation:
					for ind in xrange(len(output_valid)):
						label = output_valid[ind]
						value = target[ind]
						oneinputrow = input_valid[ind,:]
						print >> fout, value, label,
						
						for j in xrange(len(oneinputrow)):
							print >> fout, '%d:%f' % (j+1,oneinputrow[j]),
						print >> fout
				else:
					for ind in xrange(len(output)):
						label = output[ind]
						value = target[ind]
						oneinputrow = input[ind,:]
						print >> fout, value, label,
						
						for j in xrange(len(oneinputrow)):
							print >> fout, '%d:%f' % (j+1,oneinputrow[j]),
						print >> fout
				fout.close()
			
			del target
		
				
			if savemodel:
				param = ('-c %g -g %g -b %d' % (results[-2],results[-1],int(useprob)))
				m = svm_train(output,[list(x) for x in inputfiltered],param)
				svm_save_model(datafilename + '.model',m)
				
				
			
			if testdatafilename != '':
				inputfiltered_test = input_test[:,specific_selected_choice]
				if dopca:
					M = (inputfiltered_test-mean(inputfiltered_test.T,axis=1)).T # subtract the mean (along columns)
					inputfiltered_test = dot(coeff.T,M).T # projection of the data in the new space

				param = ('-c %g -g %g -b %d' % (results[-2],results[-1],int(useprob)))
				m = svm_train(output,[list(x) for x in inputfiltered],param)
				pred_labels, (ACC, MSE, SCC), pred_values = svm_predict(output_test,[list(x) for x in inputfiltered_test],m,'-b %d' % (int(useprob)))
				ACC,PHI,confusionmatrix = mygrid.evaluations_classify(output_test, [x[0] for x in pred_values],posclass,results[-3])
				db = array([[output_test[i],pred_values[i][0]] for i in range(len(output_test))])
				neg = len([x for x in output_test if x != posclass])
				pos = len(output_test)-neg

				auctest = 0				
				if neg != 0 and pos != 0:
					auctest,topacctest,optaccbias,topphitest,optphibias,top_tps_bias,top_fps = mygrid.calc_AUC(db,neg,pos,posclass,useprob,[],False,pdfpages,'Test ROC curve',results[-3])

				numpred_pos = confusionmatrix[0,0]+confusionmatrix[1,0]
				numpred_neg = confusionmatrix[0,1]+confusionmatrix[1,1]
				
				N = pos+neg
				probchance = (numpred_pos*pos+numpred_neg*neg)*1.0/(N*N)
				testkappa = (ACC/100.0-probchance)*1.0/(1-probchance);

				
				print 'Test optimized accuracy = %g' % (ACC)
				print 'Test optimized Phi statistic = %g' % (PHI)
				print 'Test optimized kappa = %g' % (testkappa)
				print '================================'
				print '||   ||%6d |%6d |       ||' % (m.get_labels()[0],m.get_labels()[1])
				print '================================'
				print '||%3d||%6g |%6g |%6g ||' % (m.get_labels()[0],confusionmatrix[0,0],confusionmatrix[0,1],pos)#confusionmatrix[0,0]+confusionmatrix[0,1])
				print '||%3d||%6g |%6g |%6g ||' % (m.get_labels()[1],confusionmatrix[1,0],confusionmatrix[1,1],neg)#confusionmatrix[1,0]+confusionmatrix[1,1])
				print '||----------------------------||'
				print '||   ||%6g |%6g |%6g ||' % (confusionmatrix[0,0]+confusionmatrix[1,0],confusionmatrix[0,1]+confusionmatrix[1,1],pos+neg)#confusionmatrix[1,0]+confusionmatrix[1,1])
				print '================================'


				if outputlog != '':
					fout = open(outputlog,'a')
	
					print >> fout, 'test: '
					print >> fout, '   ACC=%g,AUC=%g,kappa=%g,phi=%g' % (ACC,auctest,testkappa,PHI)
					print >> fout, '   ||%3d||%6g |%6g |%6g ||' % (m.get_labels()[0],confusionmatrix[0,0],confusionmatrix[0,1],pos)#confusionmatrix[0,0]+confusionmatrix[0,1])
					print >> fout, '   ||%3d||%6g |%6g |%6g ||' % (m.get_labels()[1],confusionmatrix[1,0],confusionmatrix[1,1],neg)#confusionmatrix[1,0]+confusionmatrix[1,1])
	
					fout.close()
	else:
		
		with Timer():
			if use_specific_fold_inds:
				results = mygrid.grid_classify (crange,gammarange,output,[list(x) for x in input],nf,useprob,timeout,p,fold_start)
			else:
				results = mygrid.grid_classify (crange,gammarange,output,[list(x) for x in input],nf,useprob,timeout,p)

		param = svm.svm_parameter('-c %g -g %g -b %d' % (results[-2],results[-1],int(useprob)))
		prob = svm.svm_problem(output, [list(x) for x in input])
		target = (c_double * prob.l)()
		fold_start_p = (c_int *len(fold_start))()
		for i in xrange(len(fold_start)):
			fold_start_p[i] = fold_start[i]
		
		if posclass == 'auto':
			posclass = output[0]
			
		libsvm.svm_cross_validation(prob, fold_start_p, param, nf, target)
		ys = prob.y[:prob.l]
		db = [[ys[i],target[i]] for i in range(prob.l)]
		db = array(db)
		neg = len([x for x in ys if x != posclass])
		pos = prob.l-neg;
		
		pdfpages = PdfPages('%s_train.pdf' % (outputlog))
		auc,topacc,optaccbias,topphi,optphibias,top_tps_bias,top_fps = mygrid.calc_AUC(db,neg,pos,posclass,useprob,[],True,pdfpages,'Optimal Cross-Validation ROC curve')
		pdfpages.close()
		ACC,PHI,confusionmatrix = mygrid.evaluations_classify(output, target,posclass,results[-3])
		if posclass == 1:
			negclass = 0;
		else:
			negclass = 1;
			
		print 'Train optimized accuracy = %g' % (topacc)
		print 'Train optimized phi statististic = %g' % (topphi)
		print 'TP/RECALL = %g, FP = %g, PRECISION = %g' % (confusionmatrix[0,0]/pos,confusionmatrix[1,0]/neg,confusionmatrix[0,0]/(confusionmatrix[0,0]+confusionmatrix[1,0]))
		print '================================'
		print '||   ||%6d |%6d |       ||' % (posclass,negclass)
		print '================================'
		print '||%3d||%6g |%6g |%6g ||' % (posclass,confusionmatrix[0,0],confusionmatrix[0,1],pos)#confusionmatrix[0,0]+confusionmatrix[0,1])
		print '||%3d||%6g |%6g |%6g ||' % (negclass,confusionmatrix[1,0],confusionmatrix[1,1],neg)#confusionmatrix[1,0]+confusionmatrix[1,1])
		print '||----------------------------||'
		print '||   ||%6g |%6g |%6g ||' % (confusionmatrix[0,0]+confusionmatrix[1,0],confusionmatrix[0,1]+confusionmatrix[1,1],pos+neg)#confusionmatrix[1,0]+confusionmatrix[1,1])
		print '================================'
		
		if outputpredictions:
			fout = open(predictionslog,'w')
			for ind in xrange(len(output)):
				label = output[ind]
				value = target[ind]
				oneinputrow = input[ind,:]
				print >> fout, value, label,
				
				for j in xrange(len(oneinputrow)):
					print >> fout, '%d:%f' % (j+1,oneinputrow[j]),
				print >> fout
			fout.close()
		del target
		
		print 'Optimal gamma = %g\nOptimal c = %g\nOptimal Bias = %g' % (results[-1],results[-2],optphibias)
		print 'Top CV results: AUC = %g, OPTIMIZED ACC = %g, OPTIMIZED PHI = %g' % (auc,topacc,topphi)
		if savemodel:
			param = ('-c %g -g %g -b %d' % (results[-2],results[-1],int(useprob)))
			m = svm_train(output,[list(x) for x in input],param)
			svm_save_model(datafilename+'.model',m)
		
		if testdatafilename != '':
			param = ('-c %g -g %g -b %d' % (results[-2],results[-1],int(useprob)))
			m = svm_train(output,[list(x) for x in input],param)

			pred_labels, (ACC, MSE, SCC), pred_values = svm_predict(output_test,[list(x) for x in input_test],m,'-b %d' % (int(useprob)))
			ACC,PHI,confusionmatrix = mygrid.evaluations_classify(output_test, [x[0] for x in pred_values],posclass,results[-3])

			db = array([[output_test[i],pred_values[i][0]] for i in range(len(output_test))])
			neg = len([x for x in output_test if x != posclass])
			pos = len(output_test)-neg;
			pdfpages = PdfPages('%s_test.pdf' % (outputlog))
			auctest = 0
			if neg != 0 and pos != 0:
				auctest,topacctest,optaccbias,topphitest,optphibias,top_tps_bias,top_fps = mygrid.calc_AUC(db,neg,pos,posclass,useprob,[],True,pdfpages,'Test ROC curve',results[-3])
			pdfpages.close()
			
			print 'Test accuracy = %g' % (ACC)
			print 'Test Phi statistic = %g' % (PHI)
			print 'TP/RECALL = %g, FP = %g, PRECISION = %g' % (confusionmatrix[0,0]/pos,confusionmatrix[1,0]/neg,confusionmatrix[0,0]/(confusionmatrix[0,0]+confusionmatrix[1,0]))
			print '================================'
			print '||   ||%6d |%6d |       ||' % (m.get_labels()[0],m.get_labels()[1])
			print '================================'
			print '||%3d||%6g |%6g |%6g ||' % (m.get_labels()[0],confusionmatrix[0,0],confusionmatrix[0,1],pos)#confusionmatrix[0,0]+confusionmatrix[0,1])
			print '||%3d||%6g |%6g |%6g ||' % (m.get_labels()[1],confusionmatrix[1,0],confusionmatrix[1,1],neg)#confusionmatrix[1,0]+confusionmatrix[1,1])
			print '||----------------------------||'
			print '||   ||%6g |%6g |%6g ||' % (confusionmatrix[0,0]+confusionmatrix[1,0],confusionmatrix[0,1]+confusionmatrix[1,1],pos+neg)#confusionmatrix[1,0]+confusionmatrix[1,1])
			print '================================'

		
		if outputlog != '':
			fout = open(outputlog,'a')
			print >> fout, '========================='
			print >> fout, fieldnames
			print >> fout, 'train: AUC=%g,ACC=%g,PHI=%g (g=%g,c=%g,bias=%g)' % (auc,topacc,topphi,results[-1],results[-2],results[-3])
			if testdatafilename != '':
				print >> fout, 'test: ACC=%g,AUC=%g,PHI=%g' % (ACC,auctest,PHI)
			fout.close()
def main(args):
	paramsfn = args[0]
	exec(open(paramsfn,'r').read())
	
	pdfpages = PdfPages('%s.pdf' % (outputlog))
		
	output_test,input_test,fieldnames = load_data(testdatafilename)
	if binarizeoutput:
		output_test = [1 if x > boundary else -1 for x in output_test]
	
	if doscale:
		maxinput,mininput = read_scale_data(scaledatafilename)
		input_test = (input_test-mininput)/(maxinput-mininput)
		
	if donormalize:
		means,stds = read_meansigma(normalizationfilename)
		input_test = (input_test-means)/stds

						
	if choose_specific_features:
		for specific_selected_choice in specific_selected_features:
			inputfiltered_test = input_test[:,specific_selected_choice]

			if dopca:
				coeffs,means = read_pcacoeffs(pcacoefffilename)
				temp = (inputfiltered_test-means).T
				inputfiltered_test = dot(coeffs.T,temp).T
				
			m = svm_load_model(modelfilename)
			if posclass == 'auto':
				posclass = m.get_labels()[0]
				negclass = m.get_labels()[1]
				
			print posclass
			print negclass
			
#			print len(output_test)
			pred_labels, (ACC, MSE, SCC), pred_values = svm_predict(output_test,[list(x) for x in inputfiltered_test],m,'-b %d' % (int(useprob)))
			pred_labels = [posclass if pred_values[i][0] > optbias else negclass for i in xrange(len(output_test))]
#			print len(pred_labels)
			ACC,PHI,confusionmatrix = mygrid.evaluations_classify(output_test, [x[0] for x in pred_values],posclass,optbias)
			db = array([[output_test[i],pred_values[i][0]] for i in range(len(output_test))])
			
			neg = len([x for x in output_test if x != posclass])
			pos = len(output_test)-neg;
			
			
			
			
			
			if neg != 0 and pos != 0:
				auc,topacc,optaccbias,topphi,optphibias,top_tps_bias,top_fps = mygrid.calc_AUC(db,neg,pos,posclass,useprob,[],True,pdfpages,'Test ROC curve',optbias)

			numpred_pos = confusionmatrix[0,0]+confusionmatrix[1,0]
			numpred_neg = confusionmatrix[0,1]+confusionmatrix[1,1]
			
			N = pos+neg
			probchance = (numpred_pos*pos+numpred_neg*neg)*1.0/(N*N)
			kappa = (ACC/100.0-probchance)*1.0/(1-probchance);
			
			print 'Test optimized Phi statistic = %g' % (PHI)
			print 'Test optimized accuracy = %g' % (ACC/100.0)
			print 'Test optimized kappa = %g' % (kappa)
			if pos == 0 or neg == 0:
				print 'TP/RECALL = %g, FP = %g, PRECISION = %g' % (nan,nan,nan)
			else:
				print 'TP/RECALL = %g, FP = %g, PRECISION = %g' % (confusionmatrix[0,0]/pos,confusionmatrix[1,0]/neg,confusionmatrix[0,0]/(confusionmatrix[0,0]+confusionmatrix[1,0]))
			print '================================'
			print '||   ||%6d |%6d |       ||' % (m.get_labels()[0],m.get_labels()[1])
			print '================================'
			print '||%3d||%6g |%6g |%6g ||' % (m.get_labels()[0],confusionmatrix[0,0],confusionmatrix[0,1],pos)#confusionmatrix[0,0]+confusionmatrix[0,1])
			print '||%3d||%6g |%6g |%6g ||' % (m.get_labels()[1],confusionmatrix[1,0],confusionmatrix[1,1],neg)#confusionmatrix[1,0]+confusionmatrix[1,1])
			print '||----------------------------||'
			print '||   ||%6g |%6g |%6g ||' % (confusionmatrix[0,0]+confusionmatrix[1,0],confusionmatrix[0,1]+confusionmatrix[1,1],pos+neg)#confusionmatrix[1,0]+confusionmatrix[1,1])
			print '================================'

			if outputpredictions:
				fout = open(predictionslog,'w')
				for ind in xrange(len(pred_values)):
					groundtruth = output_test[ind]
					label = pred_labels[ind]
					value = pred_values[ind][0]
					print >> fout, groundtruth,label,value,
					oneinputrow = input_test[ind,:]
					for j in xrange(len(oneinputrow)):
						print >> fout, '%d:%f' % (j+1,oneinputrow[j]),
					print >> fout
				fout.close()


				
			if outputlog != '':
				fout = open(outputlog,'a')
				print >> fout, '========================='
				print >> fout, 'SPECIFIC FIELDS:'
				print >> fout, specific_selected_choice 
				for i in specific_selected_choice:
					print >> fout, fieldnames[i],
				print >> fout
				if neg != 0 and pos != 0:
					print >> fout, 'test: ACC=%g,AUC=%g' % (ACC,auc)
				fout.close()
	else:
		

		m = svm_load_model(modelfilename)
		if posclass == 'auto':
			posclass = m.get_labels()
			posclass = posclass[0]

		pred_labels, (ACC, MSE, SCC), pred_values = svm_predict(output_test,[list(x) for x in input_test],m,'-b %d' % (int(useprob)))
		ACC,PHI,confusionmatrix = mygrid.evaluations_classify(output_test, [x[0] for x in pred_values],posclass,optbias)
		db = array([[output_test[i],pred_values[i][0]] for i in range(len(output_test))])
		neg = len([x for x in output_test if x != posclass])
		pos = len(output_test)-neg;
		if neg != 0 and pos != 0:
			auc,topacc,optaccbias,topphi,optphibias,top_tps_bias,top_fps = mygrid.calc_AUC(db,neg,pos,posclass,useprob,[],True,pdfpages,'Test ROC curve',optbias)
		

		numpred_pos = confusionmatrix[0,0]+confusionmatrix[1,0]
		numpred_neg = confusionmatrix[0,1]+confusionmatrix[1,1]
		
		N = pos+neg
		probchance = (numpred_pos*pos+numpred_neg*neg)*1.0/(N*N)
		testkappa = (ACC-probchance)*1.0/(1-probchance);

		print 'Test optimized Phi statistic = %g' % (PHI)
		print 'Test optimized accuracy = %g' % (ACC)
		print 'Test optimized kappa = %g' % (kappa)
		print 'TP/RECALL = %g, FP = %g, PRECISION = %g' % (confusionmatrix[0,0]/pos,confusionmatrix[1,0]/neg,confusionmatrix[0,0]/(confusionmatrix[0,0]+confusionmatrix[1,0]))
		print '================================'
		print '||   ||%6d |%6d |       ||' % (m.get_labels()[0],m.get_labels()[1])
		print '================================'
		print '||%3d||%6g |%6g |%6g ||' % (m.get_labels()[0],confusionmatrix[0,0],confusionmatrix[0,1],pos)#confusionmatrix[0,0]+confusionmatrix[0,1])
		print '||%3d||%6g |%6g |%6g ||' % (m.get_labels()[1],confusionmatrix[1,0],confusionmatrix[1,1],neg)#confusionmatrix[1,0]+confusionmatrix[1,1])
		print '||----------------------------||'
		print '||   ||%6g |%6g |%6g ||' % (confusionmatrix[0,0]+confusionmatrix[1,0],confusionmatrix[0,1]+confusionmatrix[1,1],pos+neg)#confusionmatrix[1,0]+confusionmatrix[1,1])
		print '================================'
		
		
		if outputpredictions:
			fout = open(testdatafilename + '_predictions.dat','w')
			for labelind in xrange(len(pred_labels)):
				print >> fout, output_test[labelind],pred_labels[labelind],pred_values[labelind][0]
			fout.close()
			
		if outputlog != '':
			fout = open(outputlog,'a')
			print >> fout, '========================='
			print >> fout, ACC
			fout.close()
	
	pdfpages.close()