def run(input_dir, num_samples_per_site, onevone = True): input_dir = sys.argv[1] num_samples_per_site = int(sys.argv[2]) target_sites = get_target_sites(input_dir) labels = dict(map(lambda (i, s): (s, i), enumerate(target_sites))) X, Y = load_feature_vectors(input_dir, num_samples_per_site, labels) #X, Y, testX, testY = select_test_set(X, Y, (num_samples_per_site / 2) * len(target_sites)) #X = [[100.0, 100.0], [101.0, 101.0], [200.0, 200.0], [201.0, 201.0], [300.0, 301.0], [300.0, 301.0]] #Y = [0.0, 0.0, 1.0, 1.0, 2.0, 2.0] iters = select_cross_validation_subsets(X, Y, 6) #crange = map(lambda c: c/10.0, range(1, 11)) #crange = map(lambda c: c/1000.0, range(1, 11)) #crange.extend(map(lambda c: c/100.0, range(2, 11))) #crange = [0.00001, 0.01, 1.0, 100.0, 10000.0] crange = [1.0, 100.0] for c in crange: print "C=%f"%c print "Linear kernel:" for d in iters: trainX, testX = scale(d["train"][0], d["test"][0]) classify(trainX, d["train"][1], testX, d["test"][1], onevone, c) print "RBF kernel:" for d in iters: trainX, testX = scale(d["train"][0], d["test"][0]) classify(trainX, d["train"][1], testX, d["test"][1], onevone, c, 'rbf')
def anomaly_detection(labels, Xnew, Y, testXnew, testY): print "Anomaly detection" for test_class in range(len(labels)): anomX = [] for (i, x) in enumerate(Xnew): if Y[i] == test_class: anomX.append(x) minD = [] # Translate so all coordinates are positive for d in range(len(anomX[0])): minD.append(min(map(lambda x: x[d], anomX))) anomX = translate(minD, anomX) anomX = scale(anomX) rho, alphas = AnomDet_fit(anomX, 0.1, rbf) test = testXnew test = translate(minD, test) test = scale(test) num_correct = [0, 0] predictions = [0, 0] for (i, x) in enumerate(test): classify = AnomDet_classify(x, alphas, rho, anomX, rbf) #print "Label: %d, classification: %d"%(testY[i], classify) if testY[i] == test_class: predictions[0] = predictions[0] + 1 if classify == 0.0: num_correct[0] = num_correct[0] + 1 else: predictions[1] = predictions[1] + 1 if classify != 0.0: num_correct[1] = num_correct[1] + 1 print "Test class %d. Normal correct: %d/%d, anomaly correct: %d/%d"%( test_class, num_correct[0], predictions[0], num_correct[1], predictions[1])
def multiclass_svm(X, Y, testX, testY, labels, c=1.0): X, testX = scale(X, testX) try: shutil.rmtree("tmp_x") except: print "No tmp directory to delete" os.mkdir("tmp_x") out = open("tmp_x/x.dat", mode="w+") for x in X: out.write(json.dumps(x.tolist())) out.write("\n") out.close() thetas, bs, slacks = SVM_fit("tmp_x/x.dat", Y, len(labels), len(X[0]), c) num_correct = 0 for (i, x) in enumerate(testX): result = SVM_classify(x, thetas, bs) if result == testY[i]: num_correct = num_correct + 1 print "Num correct: %d/%d"%(num_correct, len(testY))
def multiclass_svm(Xnew, testXnew, Y, testY, labels): print "Classifying with a multiclass SVM" Xnew, testXnew = scale(Xnew, testXnew) try: shutil.rmtree("tmp_x") except: print "No tmp directory to delete" os.mkdir("tmp_x") out = open("tmp_x/x.dat", mode="w+") for x in Xnew: out.write(json.dumps(x.tolist())) out.write("\n") out.close() out = open("tmp_x/xtest.dat", mode="w+") for x in testXnew: out.write(json.dumps(x.tolist())) out.write("\n") out.close() d = len(Xnew[0]) del Xnew del testXnew thetas, bs, slacks = SVM_fit("tmp_x/x.dat", Y, len(labels), d, 0.05) num_correct = 0 tests = open("tmp_x/xtest.dat") i = 0 for l in tests: x = json.loads(l) if (SVM_classify(x, thetas, bs) == testY[i]): num_correct = num_correct + 1 i = i + 1 print "Num correct: %d/%d"%(num_correct, len(testY))