def train_svm(params, suffix, train_X, train_Y, test_X, test_Y): C = params['C'] kernel = params['kernel'] model = SVC(gamma='scale', probability=True, C=C, kernel=kernel) print("Params C:", C, "kernel:", kernel) model.fit(train_X, train_Y) print("Train score", model.score(train_X, train_Y)) test_score = model.score(test_X, test_Y) print("Test score", test_score) return test_score, None
def phoneAccelerometerISVM(): print("Loading data...") data = pd.read_csv("./Train_Phone-Acc-nexus4_1-a.csv") print("Done!") # Parse data and make bike vs not-biking classification using an SVM. # Note: I'm assuming a window width of 500 print("Finding time series windows indexes for each class kind...") previousClassLabel = str(data.get_value(data.index[0], 'gt')) pos = 0 y = [] X = [] window = 500 while pos < data.shape[0]: # Make y label. if str(data.iloc[pos]['gt']) == 'sit': y.append(1) else: y.append(-1) # Make X row. X.append(data.iloc[pos:pos + window]['y']) # Move to the next window pos += window print("Done!") # Build and fit the SVM. print("Training SVM on all data accelerometer data...") X = np.array(X) y = np.array(y) #clfs = LinearSVC() clfs = SVC() clfs.fit(X, y) print("Done!") # print("Predicting accelerometer classes on all data using SVM...") # ypred = predict(X, clfs.coef_.reshape(len(clfs.coef_.ravel()), 1)) # print("Done!") # error = calculateTotalAbsoluteError(y, ypred) / y.shape[0] # print("Accelerometer training error (Means kind of nothing): %f"%error) # Cross validation print("Training SVM on accelerometer training only data...") X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y, test_size = 0.1) #, random_state = 0 clfs = SVC() clfs.fit(X_train, y_train) yhat = clfs.predict(X_test) print("Abs Error = %f"%( calculateTotalAbsoluteError(yhat, y_test)/len(yhat))) print("Test data mean accuracy SVM score: %f"%clfs.score(X_test, y_test)) f1_c0 = f1_score(y_test, clfs.predict(X_test), pos_label=1, average='binary') #print("Test data f1 score for class -1: %f"%(f1_c0)) print("Test data f1 score for class +1: %f" % (f1_c0)) print("Done!")
if Y_label != 'NULL' or random.random() > 0: if Y_label == event_name: Y = 1 else: Y = 0 if i == 0: X_all = X Y_all = Y i = 1 else: X_all = np.vstack((X_all, X)) Y_all = np.append(Y_all, Y) i += 1 # print (i) # print (np.sum(X_all, axis = 1)) # print(X_all, Y_all) clf = SVC(kernel=chi2_kernel) # clf = SVC() clf.fit(X_all, Y_all) print(clf.score(X_all, Y_all)) print(clf.predict(X_all)) fread.close() cPickle.dump(clf, open(output_file, "wb")) print 'SVM trained successfully for event %s!' % (event_name)
trn_embedding = embed(X_trn, triplet_model) val_embedding = embed(X_val, triplet_model) # print (X_trn, X_val) # print (trn_embedding, val_embedding) # print (triplet_model.get_weights()) clf = SVC( # class_weight='balanced', probability=True, # tol=1e-4, ) clf.fit(trn_embedding, Y_trn) print(clf.score(val_embedding, Y_val)) print(clf.predict_proba(val_embedding)) print(roc_auc_score(Y_val, clf.predict(val_embedding))) print(classification_report(Y_val, clf.predict(val_embedding), digits=4)) all_files = [x[:-8] for x in os.listdir(ALL_FILES)] X = [ pickle.load(open(os.path.join(FEATURE_PATH, x + ".fkmeans"), "rb"), encoding='latin1') for x in all_files ] # Y = [ranks[x.split()[0].strip()] for x in all_files] proba = clf.predict_proba(embed(np.array(X), triplet_model)) # print(len(proba), len(proba[0]), proba[0])
test_labels = [] for email in emails: email_id = email.id prefix_train_pos = 'email_' + str(email_id) if email_id % 5 != 0: train_arrays.append(model.docvecs[prefix_train_pos]) train_labels.append(int(email.label)) else: test_arrays.append(model.docvecs[prefix_train_pos]) test_labels.append(int(email.label)) classifier = SVC() classifier.fit(numpy.array(train_arrays), numpy.array(train_labels)) print("Overall score is %f." % classifier.score(numpy.array(test_arrays), numpy.array(test_labels))) corrects = [] wrongs = [] for email in emails: email_id = email.id prefix_train_pos = 'email_' + str(email_id) if email_id % 5 == 0: prediction = classifier.predict([model.docvecs[prefix_train_pos]])[0] actual = int(email.label) if prediction != actual: wrongs.append((email.id, prediction, actual)) else: # print(max(classifier.predict_proba([model.docvecs[prefix_train_pos]])[0]), actual) corrects.append(email.id)