def processMethod3(userid, featureCondition=1, classificationCondition=1, offsetFeatureOn=False): """ User-i Device-j hack in User-i Device-k Model: iphone6plus hack iphone5 Returns ------- float : error rate """ # rawDataiPhone6Plus = loadUserData(userid, 1, datatype=1) # moment data # rawDataiPhone5 = loadUserData(userid, 2, datatype=1) # moment data # trainingData = splitMomentDataByFeature(rawDataiPhone5, featureCondition=featureCondition) # trainingLabel = rawDataiPhone5[:, 4] # testData = splitMomentDataByFeature(rawDataiPhone6Plus, featureCondition=featureCondition) # testLabel = rawDataiPhone6Plus[:, 4] iPhone6Plus = 1 iPhone5 = 2 trainingData, trainingLabel = splitMomentDataByFeatureAndLabel(userid, iPhone5, featureCondition, classificationCondition, offsetFeatureOn=offsetFeatureOn) testData, testLabel = splitMomentDataByFeatureAndLabel(userid, iPhone6Plus, featureCondition, classificationCondition, offsetFeatureOn=offsetFeatureOn) # use same test size with method1 trainingDataIP5, testDataIP5, trainingLabelIP5, testLabelIP5 = train_test_split(trainingData, trainingLabel, test_size=my_test_size, random_state=my_random_state) trainingDataIP6, testDataIP6, trainingLabelIP6, testLabelIP6 = train_test_split( testData, testLabel, test_size=my_test_size, random_state=my_random_state) return classify(trainingDataIP5, trainingLabelIP5, testDataIP6, testLabelIP6, kernel=my_kernel, max_iter=my_max_iteration)
def processMethod4(userid, device, featureCondition=1, classificationCondition=1, offsetFeatureOn=False): """ User-i Device-j hack in User-k Device-j Model Returns ------- float : error rate """ trainingData, trainingLabel = splitMomentDataByFeatureAndLabel(userid, device, featureCondition, classificationCondition, offsetFeatureOn=offsetFeatureOn) trainingData, testData, trainingLabel, testLabel = train_test_split(trainingData, trainingLabel, test_size=my_test_size, random_state=my_random_state) # use same test size with method1 clfModel = classifyModel(trainingData, trainingLabel, kernel=my_kernel, max_iter=my_max_iteration) hackErrorRateTextList = [] hackErrorRateList = [] for testUser in xrange(1, 17): if testUser != userid : testData, testLabel = splitMomentDataByFeatureAndLabel(testUser, device, featureCondition, classificationCondition, offsetFeatureOn=offsetFeatureOn) trainingData, testData, trainingLabel, testLabel = train_test_split(testData, testLabel, test_size=my_test_size, random_state=my_random_state) # use same test size with method1 error_rate = testingWithModel(testData, testLabel, clfModel) line = 'user ' + str(testUser) + ' hack ' + str(userid) + ', error rate: ' + str(error_rate) + '\n' hackErrorRateList.append(error_rate) hackErrorRateTextList.append(line) return hackErrorRateTextList, hackErrorRateList
def identificationDataLabeling(userid, device, featureCondition, classificationCondition, offsetFeatureOn=False): data = {} label = {} for user in xrange(1,17): data[user], label[user] = splitMomentDataByFeatureAndLabel(user, device, featureCondition, classificationCondition, offsetFeatureOn=offsetFeatureOn) if user == userid: # current user label is 1 label[user] = [1 for value in label[user]] else: label[user] = [0 for value in label[user]] for user in xrange(1,17): if user != userid: data[userid] = np.vstack((data[userid], data[user])) label[userid] = np.hstack((label[userid], label[user])) return data[userid], label[userid]
def processMethod1(userid, device, featureCondition=1, classificationCondition=1, offsetFeatureOn=False): """ User-i Device-j hack in User-i Device-j Model (cross validation) i=1,2,...,16 j=1,2 Returns ------- float : error rate """ data, label = splitMomentDataByFeatureAndLabel(userid, device, featureCondition, classificationCondition, offsetFeatureOn=offsetFeatureOn) # rawData = loadUserData(userid, device, datatype) # data = splitMomentDataByFeature(rawData, featureCondition=featureCondition) # label = rawData[:, 4] # if featureCondition==0: # pass trainingData, testData, trainingLabel, testLabel = train_test_split(data, label, test_size=my_test_size, random_state=my_random_state) return classify(trainingData, trainingLabel, testData, testLabel, kernel=my_kernel, max_iter=my_max_iteration)
def identificationMoment(userid, device, featureCondition, classificationCondition, offsetFeatureOn=False): data = {} label = {} for user in xrange(1,17): data[user], label[user] = splitMomentDataByFeatureAndLabel(user, device, featureCondition, classificationCondition, offsetFeatureOn=offsetFeatureOn) if user == userid: label[user] = [1 for value in label[user]] else: label[user] = [0 for value in label[user]] for user in xrange(1,17): if user != userid: data[userid] = np.vstack((data[userid], data[user])) label[userid] = np.hstack((label[userid], label[user])) trainingData, testData, trainingLabel, testLabel = train_test_split(data[userid], label[userid], test_size=my_test_size, random_state=my_random_state) # print trainingData, trainingLabel # plot3DLabel(trainingData, trainingLabel) print 'preload finished.' err = classify(trainingData, trainingLabel, testData, testLabel, kernel=my_kernel, max_iter=my_max_iteration) print 'clssify finished.' return err
import numpy from sknn.mlp import Classifier, Layer from sklearn.cross_validation import train_test_split userid = 1 device = 1 featureCondition = 16 classificationCondition = 1 offsetFeatureOn = False my_test_size = 0.3 my_random_state = 42 data, label = splitMomentDataByFeatureAndLabel(userid, device, featureCondition, classificationCondition, offsetFeatureOn=offsetFeatureOn) data = data.astype(float) label = label.astype(int) trainingData, testData, trainingLabel, testLabel = train_test_split( data, label, test_size=my_test_size, random_state=my_random_state) nn = Classifier( layers=[Layer("Softmax", units=100, pieces=2), Layer("Softmax")], learning_rate=0.001, n_iter=10000) nn.fit(trainingData, trainingLabel)
def plotROC(userid, device, featureCondition, classificationCondition, offset=False, noisyOn=True, noisyNum=11): # userid = 1 # device = 1 # featureCondition = 10 # classificationCondition = 3 # offset = False # noisyOn = True #np.set_printoptions(threshold='nan') # fileName = '../result/img/roc/clf'+str(classificationCondition)+'/featureCondition' + str(featureCondition) + '/device' + str(device) + '/user' + str(userid) + '.png' fileName = 'result.png' if os.path.exists(fileName)==True: print 'finish ' + fileName return # import data to play with data, label = splitMomentDataByFeatureAndLabel(userid, device, featureCondition, classificationCondition, offsetFeatureOn=offset) # binarize the output classes = list() if classificationCondition==1: classes=['0','1'] elif classificationCondition==2: classes=['2','3'] elif classificationCondition==3: classes=['0','1','2','3'] else: # classificationCondition==4 classes=['0','1'] label = label_binarize(label, classes=classes) n_classes = label.shape[1] # add noisy feature to make problem harder if noisyOn==True: random_state = np.random.RandomState(my_random_state) n_samples, n_features = data.shape data = np.c_[data, random_state.randn(n_samples, noisyNum * n_features)] #shuffle and split traning and test sets trainingData, testData, trainingLabel, testLabel = train_test_split(data, label, test_size=0.5, random_state=my_random_state) # learn to predict each class against the other classifier = OneVsRestClassifier(svm.SVC(kernel=my_kernel, probability=True, random_state=my_random_state, max_iter=my_max_iteration)) label_score = classifier.fit(trainingData, trainingLabel).decision_function(testData) print 'decision success.' # Compute ROC curve and ROC area for each class fpr = dict() tpr = dict() roc_auc = dict() for i in range(n_classes): fpr[i], tpr[i], _ =roc_curve(testLabel[:, i], label_score[:, i]) roc_auc[i] = auc(fpr[i], tpr[i]) # # Compute micro-average ROC curve and ROC area # print testLabel # print label_score # fpr["micro"], tpr["micro"], _ = roc_curve(testLabel.ravel(), label_score.ravel()) # roc_auc["micro"] = auc(fpr["micro"], tpr["micro"]) # # Compute macro-average ROC curve and ROC area # # First aggregate all false positive rates # all_fpr = np.unique(np.concatenate([fpr[i] for i in range(n_classes)])) # # Then interpolate all ROC curves at this points # mean_tpr = np.zeros_like(all_fpr) # for i in range(n_classes): # mean_tpr += interp(all_fpr, fpr[i], tpr[i]) # # Finally average it and compute AUC # mean_tpr /= n_classes # fpr["macro"] = all_fpr # tpr["macro"] = mean_tpr # roc_auc["macro"] = auc(fpr["macro"], tpr["macro"]) # Plot all ROC curves plt.figure() # plt.plot(fpr["micro"], tpr["micro"], # label='micro-average ROC curve (area = {0:0.2f})' # ''.format(roc_auc["micro"]), # linewidth=2) # plt.plot(fpr["macro"], tpr["macro"], # label='macro-average ROC curve (area = {0:0.2f})' # ''.format(roc_auc["macro"]), # linewidth=2) for i in range(n_classes): strLabel = 'ROC curve of class {0} (area = {1:0.2f})'''.format(i, roc_auc[i]) plt.plot(fpr[i], tpr[i], label=strLabel, lw=3) plt.plot([0, 1], [0, 1], 'k--') plt.xlim([0.0, 1.0]) plt.ylim([0.0, 1.0]) plt.xlabel('False Positive Rate') plt.ylabel('True Positive Rate') plt.title('Receiver operating characteristic') plt.legend(loc="lower right") # fileName = '../result/img/roc/clf'+str(classificationCondition)+'/featureCondition' + str(featureCondition) + '/device' + str(device) + '/user' + str(userid) + '.png' plt.savefig(fileName, dpi=72) print 'finish ' + fileName plt.close('all')
from loaddata import loadUserData from loaddata import splitMomentDataByFeature from loaddata import splitMomentDataByLabel from loaddata import splitMomentDataByFeatureAndLabel userid=1 device=1 featureCondition=3 classificationCondition=1 offsetFeatureOn=True batch_size = 45 my_test_size = 0.3 my_random_state = 42 data, label = splitMomentDataByFeatureAndLabel(userid, device, featureCondition, classificationCondition, offsetFeatureOn=offsetFeatureOn) trainingData, testData, trainingLabel, testLabel = train_test_split(data, label, test_size=my_test_size, random_state=my_random_state) def plot3DLabel(data, label, trainLabel): print data.shape print label fig = plt.figure() ax = fig.add_subplot(211, projection='3d') x = [float(value)/736 for value in data[:,0]] y = [float(value)/414 for value in data[:,1]] z = [float(value) for value in data[:,2]] label = [1 if value=='1' else 0 for value in label] ax.scatter(x,y,z,c=label, marker='o') ax.set_xlabel('X') ax.set_ylabel('Y')