Example #1
0
def main(maxiter,featureset):
    import sys
    print 'Number of arguments:', len(sys.argv), 'arguments.'
    print 'Argument List:', str(sys.argv)
    maxiter = int(sys.argv[-2])
    featureset = int(sys.argv[-1])
    sys.path
    sys.path.append(".") 
    import Solution as sl
    import numpy as np
    learner = sl.perceptron(maxiter,featureset)
    labels = []
    labels.append(learner['otrain'][:,0])
    labels.append(learner['ptrain'])
    match = sum(learner['otrain'][:,0] == learner['ptrain'])
    #print '*******************'
    #print 'Training accuracy is: {0:.3f}%'.format(match*1.0/len(learner['otrain'])*100.0)
    #print '*******************'

    #########################################
    ##### now run validation
    vdata = np.array(readfile("./validation.txt"))
    nvdata = vdata.shape[0]
    # get rid of ? missing data
    contatr = np.array([1,2,7,10,13,14])
    attrlist = np.array(range(len(vdata[0])))
    for i in contatr:
        dum = vdata[vdata[:,i]!=np.array('?'),i]
        vdata[vdata[:,i]==np.array('?'),i] = np.mean(dum.astype(np.float))
    disatr = list(set(attrlist)-set(contatr))
    for i in disatr:
        dum = max(set(list(vdata[:,i])), key=list(vdata[:,i]).count)
        vdata[vdata[:,i]==np.array('?'),i] = np.array(dum)
    print "is there missing data: ", sum(sum(vdata[:,:15]==np.array('?')))
    ori_v_label = vdata[:,-1];
    ori_v_label_bool = np.zeros((vdata.shape[0],1))-1
    ori_v_label_bool[ori_v_label==np.array('+')] = 1
    newdatafeatset = sl.getfeature(learner['thres'],vdata[:,:15],featureset,disatr,contatr) # no label col
    predv = np.sign(np.sum(np.tile(learner['alphaa']*learner['y'],(1,nvdata))*np.dot(learner['X'],newdatafeatset.T),axis=0))
    match = sum(predv == ori_v_label_bool[:,0])
    labels.append(ori_v_label_bool[:,0])
    labels.append(predv)
    #print '*******************'
    #print 'validation accuracy is: {0:.3f}%'.format(match*1.0/nvdata*100.0)
    #print '*******************'


    ##########################################
    ##### now run test
    tdata = np.array(readfile("./test.txt"))
    ntdata = tdata.shape[0]
    # get rid of ? missing data
    contatr = np.array([1,2,7,10,13,14])
    attrlist = np.array(range(len(tdata[0])))
    for i in contatr:
        dum = tdata[tdata[:,i]!=np.array('?'),i]
        tdata[tdata[:,i]==np.array('?'),i] = np.mean(dum.astype(np.float))
    disatr = list(set(attrlist)-set(contatr))
    for i in disatr:
        dum = max(set(list(tdata[:,i])), key=list(tdata[:,i]).count)
        tdata[tdata[:,i]==np.array('?'),i] = np.array(dum)
    print "is there missing data: ", sum(sum(tdata[:,:15]==np.array('?')))
    ori_t_label = tdata[:,-1];
    ori_t_label_bool = np.zeros((tdata.shape[0],1))-1
    ori_t_label_bool[ori_v_label==np.array('+')] = 1
    newdatafeatset = sl.getfeature(learner['thres'],tdata[:,:15],featureset,disatr,contatr) # no label col
    predt = np.sign(np.sum(np.tile(learner['alphaa']*learner['y'],(1,ntdata))*np.dot(learner['X'],newdatafeatset.T),axis=0))
    match = sum(predt == ori_t_label_bool[:,0])
    #print '*******************'
    #print 'test accuracy is: {0:.3f}%'.format(match*1.0/ntdata*100.0)
    #print '*******************'
    labels.append(ori_t_label_bool[:,0])
    labels.append(predt)

    if labels == None or len(labels) != 6:
	print '\nError: Perceptron Return Value.\n' 
    else:
	eval(labels[0],labels[1],labels[2],labels[3],labels[4],labels[5])
Example #2
0
import sys
sys.path
sys.path.append("/home/he72/STAT590/hw2") 
import Solution as sl
import numpy as np
learner = sl.perceptron(10,1)
labels = []
labels.append(learner['otrain'])
labels.append(learner['ptrain'])

sys.argv=['5','2']
execfile('./Check.py')

import os
import sys
sys.path.append("/home/he72/STAT590/hw2")
import numpy as np
import Check as Ck
data = np.array(Ck.readfile("/home/he72/STAT590/hw2/train.txt"))

# get rid of ? missing data
contatr = np.array([1,2,7,10,13,14]) 
attrlist = np.array(range(len(data[0])))
for i in contatr:
    dum = data[data[:,i]!=np.array('?'),i]
    data[data[:,i]==np.array('?'),i] = np.mean(dum.astype(np.float))
    disatr = list(set(attrlist)-set(contatr))
for i in disatr:
        dum = max(set(list(data[:,i])), key=list(data[:,i]).count)
        data[data[:,i]==np.array('?'),i] = np.array(dum)
print "is there missing data: ", sum(sum(data[:,:15]==np.array('?')))