Example #1
0
def main():
    data = pickle.load(open('../submodular_20.pickle'))
    train, train_labels, test, test_labels = Load20NG()
    vectorizer = sklearn.feature_extraction.text.CountVectorizer(binary=True,
            lowercase=False) 
    vectorizer.fit(train + test)                                                          
    train_vectors = vectorizer.transform(train)
    test_vectors = vectorizer.transform(test)                                             
    svm = sklearn.svm.SVC(probability=True, kernel='rbf', C=10,gamma=0.001)               
    svm.fit(train_vectors, train_labels)                                                  
    
    json_ret = {}
    json_ret['class_names'] = ['Atheism', 'Christianity']
    json_ret['instances'] = []
    explanations = data['explanations']['20ng']['svm']
    idxs = data['submodular_idx']['20ng']['svm'][:10]
    for i in idxs:
        json_obj = {}
        json_obj['id'] = i
        idx = i
        instance = test_vectors[idx]
        json_obj['true_class'] = test_labels[idx]
        json_obj['c1'] = {}
        json_obj['c1']['predict_proba'] = list(svm.predict_proba(test_vectors[idx])[0])
        exp = explanations[idx]
        json_obj['c1']['exp'] = exp 
        json_obj['c1']['data'] = get_pretty_instance(test[idx], exp, vectorizer)
        json_ret['instances'].append(json_obj)
    import json
    open('static/exp2_local.json', 'w').write('data = %s' % json.dumps(json_ret))
Example #2
0
def predict_sentiment(classifier, text):
    if classifier not in ['ffnn', 'svm', 'rf']:
        return jsonify(success=False, error="classifier_not_found")

    tokens = tokenize(text)
    doc_vector = d2v_model.infer_vector(tokens)

    prediction = {
        'svm':
        lambda doc_vector: svm.predict_proba(doc_vector)[0],
        'rf':
        lambda doc_vector: rf.predict_proba(doc_vector)[0],
        'ffnn':
        lambda doc_vector: ffnn.predict(np.array([doc_vector]), batch_size=1)[0
                                                                              ]
    }[classifier](doc_vector)
    polarity = 'positive' if prediction[0] > prediction[1] else 'negative'

    return_json = {
        'success': True,
        'polarity': polarity,
        'score': prediction.tolist(),
        'word2vec_hit_rate': 1.00
    }

    return jsonify(**return_json)
def predict_proba(dataset):
    svm = joblib.load("hog_model/svm%d-%d-%d.model" % (WORD, S, E))

    test_data = np.float32([]).reshape(0, WORD)
    test_target = []

    dictIdx = 0
    for name in dataset.keys()[S:E]:
        print(name)
        for i in dataset[name]:
            test_data = np.append(test_data, i.reshape(1, -1), axis=0)
        test_target += [dictIdx for i in range(len(dataset[name]))]
        dictIdx += 1

    p = svm.predict_proba(test_data)
    print(p)
    p = p.argsort()
    print(p)
    print(test_target)
    q = 0
    l = 0
    right = 0
    for name in dataset.keys()[S:E]:
        r = 0
        for i in range(l, l + len(dataset[name])):
            if test_target[i] in p[i][E - 5:E]:
                r += 1
        right += r
        print("%s : %d/%d" % (name, r, len(dataset[name])))
        l += len(dataset[name])
    print("%d / %d" % (right, l))
def move_particle(image, centroid, svm, radius, part_width, threshold):
    img = image.copy()
    img = np.pad(img, pad_width=((radius,radius),(radius,radius)),mode='median')
    particles = np.empty((0, part_width * part_width))
    # can I replace following loop to increase the speed??

    x = centroid[0]
    y = centroid[1]

    # x is width, y is height
##    print("x is: ", x)
##    print("y is: ", y)
    if radius == 0:
        print("radius is zero")
        exit()
    for i in range(2 * radius + 1):
        for j in range(2 * radius + 1):
            left = x + i - int(part_width / 2)
            right = left + part_width
            top = y + j - int(part_width / 2)
            bottom = top + part_width
##            segment = img[left:right, top:bottom]
            segment = img[top:bottom, left:right]
            
##            print("img shape: ", img.shape)
            if segment.shape[0] == part_width and segment.shape[1] == part_width:
##                print("segment shape is: ", segment.shape)
                segment = segment.reshape((1,-1))
                particles = np.append(particles, segment, axis=0)
            else:
                particles = np.append(particles, np.zeros((1, part_width * part_width))-1,axis=0)
    if particles.shape[0] != 0:        
        pred_prob = svm.predict_proba(particles)
        # here assume the correct label of pred_prob is at 1 along axis 1
        best_fit_index = np.argmax(pred_prob[:,1])
        best_fit_prob = pred_prob[best_fit_index,1]
##        print("best prob is: ", best_fit_prob)
        bxx = int(best_fit_index / (2 * radius + 1))
        byy = int(best_fit_index % (2 * radius + 1))
##        print("best x is %d, y is %d" % (bxx,byy)) 

        # here the best fit index probability is set to -1 to find the second fit index
        pred_prob[best_fit_index,1] = -1
        second_fit_index = np.argmax(pred_prob[:,1])
        second_fit_prob = pred_prob[second_fit_index,1]
##        print("second prob is: ", second_fit_prob)
        sxx = int(second_fit_index / (2 * radius + 1))
        syy = int(second_fit_index % (2 * radius + 1))
##        print("second x is %d, y is %d" % (sxx,syy))

        if second_fit_prob >= threshold:
            return(np.array([2, bxx + x - radius, byy + y - radius, sxx + x - radius, syy + y - radius]).astype(int))
        elif best_fit_prob >= threshold:
            return(np.array([1, bxx + x - radius, byy + y - radius, -1, -1]).astype(int))
        else:
            return(np.array([0, -1, -1 , -1, -1]).astype(int))
    else:
        return(np.array([0, -1, -1 , -1, -1]).astype(int))
Example #5
0
def runSVM(x):
    #load svm
    with open("svm.pkl", 'rb') as f:
        svm = pickle.load(f)

    ########################################### normal classification with SVM#######################

    y_pred = svm.predict(x)
    y_prob = svm.predict_proba(x)
    y_prob = y_prob[:, 1]
Example #6
0
def classify(test_features, svm):
    max_prob = 0
    max_choice = 0
    for j in range(4):
        instance = test_features[j]
        # print svm.predict_proba(instance)
        p = svm.predict_proba(instance)[0][0]
        if p > max_prob:
            max_prob = p
            max_choice = j
    return max_choice
Example #7
0
def svm(x_train,y_train,x_test, threshold = 0.5):#SVM with linear kernel (similiar to LogisticRegression)
    from sklearn import svm
    svm = svm.SVC( kernel = 'linear')
    print('Start training')
    svm.fit(x_train,y_train)
    print('training finished')
    print('start prediction')
    svm_predict = svm.predict_proba(x_test)
    print('prediction finished')
    #threshold = 0.0363 #set decision threshold for class imbalance (threshold = positive sample/negative sample)
    svm_predicted = (svm_predict [:,1] >= threshold).astype('int')
    dp.save_result(svm_predicted, 'SVM_result13_threshold00363')#save prediction
Example #8
0
def predict_svm(X, svm, std_scaler=None, pca=None, probability=True):
    # Apply PCA if available
    if pca is not None:
        X = pca.transform(X)
        X = np.float32(X)

    # Standardize data
    if std_scaler is None:
        X_std = X
    else:
        X_std = std_scaler.transform(X)

    # Predict the labels
    if probability:
        return svm.predict_proba(X_std)
    else:
        return svm.predict(X_std)
def predict(dataset):
    def result(p,test_target,dataset,num):
        print("###############################")
        #top num
        l = 0
        right = 0
        for name in list(dataset.keys())[S:E]:
            r = 0
            for i in range(l,l+len(dataset[name])):
                # print(test_target[i],p[i][E-num-1:E-1])
                if test_target[i] in p[i][E-num-1:E-1]:
                    r += 1
            right+=r
            print(  "%s : %d/%d" %(name,r,len(dataset[name])))
            l+=len(dataset[name])
        print( "%d / %d"%(right,l))
        print("###############################")

    svm = joblib.load("model/%d_svm%d-%d-%d.model"%(V,WORD,S,E))
    clf = joblib.load("model/%d_vocab%d-%d-%d.pkl"%(V,WORD,S,E))
    # centers = clf.cluster_centers_
    test_data = np.float32([]).reshape(0,WORD)
    test_target = []

    target = {}
    for i,name in enumerate(list(dataset.keys())):
        target[name] = i

    for name in list(dataset.keys())[S:E]:
        idx = target[name]
        print( name)
        for i in dataset[name]:
            featVec = getFeatVec(i, clf)
            test_data = np.append(test_data,featVec,axis=0)
        test_target += [idx for i in range(len(dataset[name]))]

    p = svm.predict_proba(test_data)
    p = p.argsort()
    result(p,test_target,dataset,5)
    result(p,test_target,dataset,4)
    result(p,test_target,dataset,3)
    result(p,test_target,dataset,2)
    result(p,test_target,dataset,1)
Example #10
0
def predict(df, hate=10, threshold=0.6):
    '''This function takes a dataset with comments labels as tweet and predicts the hatefulness.
    Optional parameters are hate and threshold.
    It returns 5 elements in this order [first_hate, count_hate, count_comments, hate_ratio]
    - first_hate: First hate comments as dict
    - count_hate: Total amount of hate comments= sum(y_pred_svm)
    - count_comments: Total amount of comments and subcomments
    - hate_ratio = count_hate/count_comments '''
    test = pd.DataFrame(df)
    test_tweet = clean_tweets(test["tweet"])

    test["clean_tweet"] = test_tweet

    x_test_vec = vectorizer.transform(test_tweet)

    y_pred_svm = svm.predict(x_test_vec)
    test["prediction"] = y_pred_svm

    y_pred_proba = svm.predict_proba(x_test_vec)
    proba = []
    for i in y_pred_proba:
        a = i[0]
        proba.append(a)
    test["proba"] = proba
    test_sort = test.sort_values(by=["proba"], ascending=False)

    hateful_comments = test_sort[(test_sort["prediction"] == 1)
                                 & (test_sort["proba"] >= threshold)]

    count_hate = len(hateful_comments)
    count_comments = len(y_pred_svm)
    hate_ratio = count_hate / count_comments
    percentage = round(hate_ratio * 100, 2)
    first_hate = hateful_comments[["tweet", "proba"]][:hate]

    return [
        first_hate.to_dict('records'), count_hate, count_comments, percentage
    ]
Example #11
0
def testmodel(path):
    def getFeatVec(features, clf):
        featVec = np.zeros((1, 1000))
        res = clf.predict(features)
        for i in res:
            featVec[0][i] += 1
        return featVec

    def result(predict_y, test_y, num):
        print("###############################")
        right = 0
        res = []
        for i, tag in enumerate(test_y):
            if tag in predict_y[i][21 - num:21]:
                right += 1
                res.append(True)
            else:
                res.append(False)
        print("%d/%d = %f" % (right, len(test_y), right * 1.0 / len(test_y)))
        print("###############################")
        return res

    print('load model')
    svm = joblib.load("./05svm.model")
    clf = joblib.load("./05vocab.pkl")
    data, tags = loaddata(path)
    features, tags = extractfeature(data, tags)

    print('predict')
    test_x = np.float32([]).reshape(0, 1000)
    for feature in features:
        featVec = getFeatVec(feature, clf)
        test_x = np.append(test_x, featVec, axis=0)
    p = svm.predict_proba(test_x)
    p = p.argsort()
    res = result(p, tags, 5)
    return res
Example #12
0
def silence_removal(signal,
                    sampling_rate,
                    st_win,
                    st_step,
                    smooth_window=0.5,
                    weight=0.5,
                    plot=False):
    """
    Event Detection (silence removal)
    ARGUMENTS:
         - signal:                the input audio signal
         - sampling_rate:               sampling freq
         - st_win, st_step:    window size and step in seconds
         - smoothWindow:     (optinal) smooth window (in seconds)
         - weight:           (optinal) weight factor (0 < weight < 1)
                              the higher, the more strict
         - plot:             (optinal) True if results are to be plotted
    RETURNS:
         - seg_limits:    list of segment limits in seconds (e.g [[0.1, 0.9],
                          [1.4, 3.0]] means that
                          the resulting segments are (0.1 - 0.9) seconds
                          and (1.4, 3.0) seconds
    """

    if weight >= 1:
        weight = 0.99
    if weight <= 0:
        weight = 0.01

    # Step 1: feature extraction
    # signal = audioBasicIO.stereo_to_mono(signal)
    st_feats, _ = feature_extraction(signal, sampling_rate,
                                     st_win * sampling_rate,
                                     st_step * sampling_rate)

    # Step 2: train binary svm classifier of low vs high energy frames
    # keep only the energy short-term sequence (2nd feature)
    st_energy = st_feats[1, :]
    en = np.sort(st_energy)
    # number of 10% of the total short-term windows
    st_windows_fraction = int(len(en) / 10)

    # compute "lower" 10% energy threshold
    low_threshold = np.mean(en[0:st_windows_fraction]) + 1e-15

    # compute "higher" 10% energy threshold
    high_threshold = np.mean(en[-st_windows_fraction:-1]) + 1e-15

    # get all features that correspond to low energy
    low_energy = st_feats[:, np.where(st_energy <= low_threshold)[0]]

    # get all features that correspond to high energy
    high_energy = st_feats[:, np.where(st_energy >= high_threshold)[0]]

    # form the binary classification task and ...
    features = [low_energy.T, high_energy.T]
    # normalize and train the respective svm probabilistic model

    # (ONSET vs SILENCE)
    features_norm, mean, std = normalize_features(features)
    svm = train_svm(features_norm, 1.0)

    # Step 3: compute onset probability based on the trained svm
    prob_on_set = []
    for index in range(st_feats.shape[1]):
        # for each frame
        cur_fv = (st_feats[:, index] - mean) / std
        # get svm probability (that it belongs to the ONSET class)
        prob_on_set.append(svm.predict_proba(cur_fv.reshape(1, -1))[0][1])
    prob_on_set = np.array(prob_on_set)

    # smooth probability:
    prob_on_set = smooth_moving_avg(prob_on_set, smooth_window / st_step)

    # Step 4A: detect onset frame indices:
    prog_on_set_sort = np.sort(prob_on_set)

    # find probability Threshold as a weighted average
    # of top 10% and lower 10% of the values
    nt = int(prog_on_set_sort.shape[0] / 10)
    threshold = (np.mean((1 - weight) * prog_on_set_sort[0:nt]) +
                 weight * np.mean(prog_on_set_sort[-nt::]))

    max_indices = np.where(prob_on_set > threshold)[0]
    # get the indices of the frames that satisfy the thresholding
    index = 0
    seg_limits = []
    time_clusters = []

    # Step 4B: group frame indices to onset segments
    while index < len(max_indices):
        # for each of the detected onset indices
        cur_cluster = [max_indices[index]]
        if index == len(max_indices) - 1:
            break
        while max_indices[index + 1] - cur_cluster[-1] <= 2:
            cur_cluster.append(max_indices[index + 1])
            index += 1
            if index == len(max_indices) - 1:
                break
        index += 1
        time_clusters.append(cur_cluster)
        seg_limits.append(
            [cur_cluster[0] * st_step, cur_cluster[-1] * st_step])

    # Step 5: Post process: remove very small segments:
    min_duration = 0.2
    seg_limits_2 = []
    for s_lim in seg_limits:
        if s_lim[1] - s_lim[0] > min_duration:
            seg_limits_2.append(s_lim)

    return seg_limits_2
Example #13
0
def predictSVM(svm, x, y, x_test):
    print("[SVM] Testando modelo...")
    yPredito = svm.predict(x_test)
    return yPredito, svm.predict_proba(x_test)[:, 1]
Example #14
0
for col in cat:
    train[col] = le.fit_transform(train[col])
    test[col] = le.fit_transform(test[col])

#no shirts, no shoes
train_X = train.drop(['year','oscar', 'movie_name', 'actor_name', 'href'], axis=1)  
test_X = test.drop(['year','oscar', 'movie_name', 'actor_name', 'href'], axis = 1)
 
train_Y = train['oscar']

#Fights will go on as long as they want to
svm =svm.SVC(kernel='rbf',C=1).fit(train_X,train_Y)
svm.score(train_X, train_Y)

#If this is your first night at Fight Club, you have to fight.
pred_svm = svm.predict_proba(test_X)[:,1]
svm_prediction = pd.DataFrame(pred_svm, test['movie_name'])













def predict_svm(svm, hog_window):
    """Return the confidence of classifying as a car."""
    return svm.predict_proba(hog_window.reshape(1, -1))[:, 1]
datax = scaler.transform(datax_temp)

xgb = pickle.load(open("xgboost.dat", "rb"))
svm = pickle.load(open("svm.dat", "rb"))
lr = pickle.load(open("lr.dat", "rb"))
randomforest = pickle.load(open("randomforest.dat", "rb"))

xgb_pred = pd.DataFrame(xgb.predict_proba(datax)[:, 1])
rf_pred = pd.DataFrame(randomforest.predict_proba(datax)[:, 1])

temp = pd.concat([xgb_pred, rf_pred], axis=1)
temp['avg'] = temp.mean(axis=1)
combined_df = pd.concat([
    pd.DataFrame(data_parent[['subject_id', 'datetime']]),
    pd.DataFrame(datax_temp), temp['avg']
],
                        axis=1)

combined_df['patient_category'] = combined_df.apply(f, axis=1)

# critical_patients = combined_df.loc[combined_df['patient_category'].isin(['very-critical', 'critical', 'moderate-critical'])]
combined_df.to_csv('critical_patients_records.csv')

xgb_prob = pd.DataFrame(xgb.predict_proba(datax))
svm_prob = pd.DataFrame(svm.predict_proba(datax))
lr_prob = pd.DataFrame(lr.predict_proba(datax))
rf_prob = pd.DataFrame(randomforest.predict_proba(datax))

final_df = pd.concat([xgb_prob, rf_prob, lr_prob, svm_prob], axis=1)
final_df.to_csv("prob_predictions_model-level.csv")
Example #17
0
lenzip = 0
count = 0
for func,name in zip([triangle(width=img_size, height=img_size), rectangle(width=img_size, height=img_size), trapazoid(width=img_size, height=img_size), rhombus(width=img_size, height=img_size)], ['triangle', 'rectangle', 'trapazoid', 'rhombus']):
	print name
	for shape in func:
		#print "tick"

		#Failed fix this!!!!!!!!!!!!!!!!!!1
		intersect_list = radial_intercepts(shape, img_size)
		intersect_list = np.asarray(scaler.transform(intersect_list))

		svm_prediction = svm.predict(intersect_list)
		#dtree_prediction = dtree.predict(intersect_list)
		if svm_prediction != name:
			prob = svm.predict_proba(intersect_list)
			svm_errors.append([name, svm_prediction, shape, prob, intersect_list])
		else:
			count+=1
'''

img = cv2.imread('/home/lie/Desktop/p/cut1.jpg')

letter = cv2.Canny(img,100,200,apertureSize=3)	

contours, hierarchy = cv2.findContours(letter.copy(), cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)
cnts =[]
for cnt in contours:
	cnt = cv2.approxPolyDP(cnt,.05,True)
	if cv2.contourArea(cnt)>2400:
		cnts.append(cnt)
test_est_p_ann = ann.predict_proba(x_test)[:,1]
train_est_p_ann = ann.predict_proba(x_train)[:,1]

fpr_test_ann, tpr_test_ann, th_test_ann=metrics.roc_curve(y_test,test_est_p_ann,pos_label=1)
fpr_train_ann, tpr_train_ann, th_train_ann=metrics.roc_curve(y_train,train_est_p_ann,pos_label=1)

#用svm创建模型
from sklearn import svm
svm=svm.SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape=None, degree=3, gamma='auto', kernel='linear',
  max_iter=-1, probability=True, random_state=None, shrinking=True,
  tol=0.001, verbose=False)
svm.fit(x_train,y_train)
test_est_svm = svm.predict(x_test)
train_est_svm = svm.predict(x_train)
test_est_p_svm = svm.predict_proba(x_test)[:,1]
train_est_p_svm = svm.predict_proba(x_train)[:,1]
fpr_test_svm, tpr_test_svm, th_test_svm=metrics.roc_curve(y_test,test_est_p_svm,pos_label=1)
fpr_train_svm, tpr_train_svm, th_train_svm=metrics.roc_curve(y_train,train_est_p_svm,pos_label=1)


#用随机森林建模
import json
from operator import itemgetter
from sklearn.externals import joblib
from sklearn.ensemble import RandomForestClassifier
from sklearn.cross_validation import cross_val_score
from sklearn.pipeline import Pipeline
from sklearn.grid_search import GridSearchCV
from sklearn.cross_validation import train_test_split,StratifiedShuffleSplit,StratifiedKFold
clf=RandomForestClassifier(n_estimators=500, criterion='entropy', max_depth=5, min_samples_split=1,
Example #19
0
def computeLikelihood(patch):
    X = hog.compute(patch).T
    prediction = svm.predict_proba(X)
    return prediction[0][1]
Example #20
0
dt.fit(X_train, y_train)
y_pred_dt = dt.predict_proba(X_test)[:, 1]
fpr_dt, tpr_dt, _ = roc_curve(y_test, y_pred_dt)
precision_dt, recall_dt, _ = precision_recall_curve(y_test, y_pred_dt)
roc_auc_dt = auc(fpr_dt, tpr_dt)

mlp = MLPClassifier()
mlp.fit(X_train, y_train)
y_pred_mlp = mlp.predict_proba(X_test)[:, 1]
fpr_mlp, tpr_mlp, _ = roc_curve(y_test, y_pred_mlp)
precision_mlp, recall_mlp, _ = precision_recall_curve(y_test, y_pred_mlp)
roc_auc_mlp = auc(fpr_mlp, tpr_mlp)

svm = svm.SVC(probability=True)
svm.fit(X_train, y_train)
y_pred_svm = svm.predict_proba(X_test)[:, 1]
fpr_svm, tpr_svm, _ = roc_curve(y_test, y_pred_svm)
precision_svm, recall_svm, _ = precision_recall_curve(y_test, y_pred_svm)
roc_auc_svm = auc(fpr_svm, tpr_svm)

sgd = SGDClassifier(loss='log')
sgd.fit(X_train, y_train)
y_pred_sgd = sgd.predict_proba(X_test)[:, 1]
fpr_sgd, tpr_sgd, _ = roc_curve(y_test, y_pred_sgd)
precision_sgd, recall_sgd, _ = precision_recall_curve(y_test, y_pred_sgd)
roc_auc_sgd = auc(fpr_sgd, tpr_sgd)

gb = GaussianNB()
gb.fit(X_train, y_train)
y_pred_gb = gb.predict_proba(X_test)[:, 1]
fpr_gb, tpr_gb, _ = roc_curve(y_test, y_pred_gb)
Example #21
0
Tree_proba1 = Tree_clf.predict_proba(X)

print("the f1 score of decision tree in the test data = ",
      f1_score(Y, Tree_pred1, average='weighted'))
print("the jaccard score of the decison tree in the test data  = ",
      jaccard_score(Y, Tree_pred1, average='weighted'))
print("the logloss for decision tree in the test datset = ",
      log_loss(Y, Tree_proba1))
f_lst.append(f1_score(Y, Tree_pred1, average='weighted'))
j_lst.append(jaccard_score(Y, Tree_pred1, average='weighted'))
l_lst.append(log_loss(Y, Tree_proba1))
n_lst.append('Descion Tree')

#SVM for the test data
SVM_pred1 = svm.predict(X)
SVM_proba = svm.predict_proba(X)

print("the f1 score for the svm in test data = ",
      f1_score(Y, SVM_pred1, average='weighted'))
print("the jaccard score for the svm in the test data =  ",
      jaccard_score(Y, SVM_pred1, average="weighted"))
print(" log loss for SVM in the test datset =  ", log_loss(Y, SVM_proba))
f_lst.append(f1_score(Y, SVM_pred1, average='weighted'))
j_lst.append(jaccard_score(Y, SVM_pred1, average='weighted'))
l_lst.append(log_loss(Y, SVM_proba))
n_lst.append('SVM')

# Final Report
Report = pd.DataFrame(columns=['Algorithm', 'Jaccard', 'F1-Score', 'LogLoss'])
Report['Algorithm'] = n_lst
Report['Jaccard'] = j_lst
                                         max_depth=3,
                                         random_state=57)
bayes = GaussianNB(priors=[0.25, 0.25, 0.25, 0.25])
svm = svm.SVC(probability=True, C=0.01, gamma=1, random_state=1289)

last_clf.fit(meta_train_data, meta_train_true_labels)

forest.fit(train_input, train_data['true_class'])
bayes.fit(train_input, train_data['true_class'])
#bayes.fit(train_input_iid, train_data['true_class'])
svm.fit(train_input, train_data['true_class'])

test_proba_forest = forest.predict_proba(test_input)
test_proba_bayes = bayes.predict_proba(test_input)
#test_proba_bayes = bayes.predict_proba(test_input_iid)
test_proba_svm = svm.predict_proba(test_input)

test_proba = np.concatenate(
    (test_proba_forest, test_proba_bayes, test_proba_svm), axis=1)
# test_proba = np.concatenate((test_proba_forest, test_proba_svm), axis=1)
# test_proba = (test_proba_forest + test_proba_svm + test_proba_bayes)/3

pred = last_clf.predict(test_proba)

differ = abs(pred - test_data['true_class'])
accu = 1 - np.count_nonzero(differ) / test_data.shape[0]

print("Stacked accuracy:")
print(accu)

print("Stacked confusion matrix")
Example #23
0

def score(svm, datax, datay):
    return np.mean(svm.predict(datax) == datay)


print("Lineairement séparable")
# Lineairement separable avec un peu de bruit
datax, datay = gen_arti(nbex=1000, data_type=0, epsilon=1)
testx, testy = gen_arti(nbex=1000, data_type=0, epsilon=1)

# lineaire avec paramètres par défaut
svm = sklearn.svm.SVC(probability=True, kernel='linear')
svm.fit(datax, datay)

plot_frontiere_proba(datax, lambda x: svm.predict_proba(x)[:, 0], step=50)
plot_data(datax, datay)
plt.show()
print("Parametres par defaut : ", score(svm, testx, testy))

# lineaire avec C très fort
svm = sklearn.svm.SVC(probability=True, kernel='linear', C=99)
svm.fit(datax, datay)

plot_frontiere_proba(datax, lambda x: svm.predict_proba(x)[:, 0], step=50)
plot_data(datax, datay)
plt.show()
print("C fort : ", score(svm, testx, testy))

print("Non lineaire")
# Non-lineairement separable avec un peu de bruit
Example #24
0
def computeLikelihood(patch):
	X = hog.compute(patch).T
	prediction = svm.predict_proba(X)
	return prediction[0][1]
                                     max_leaf_nodes=None,
                                     min_samples_leaf=1,
                                     min_samples_split=2,
                                     min_weight_fraction_leaf=0.0,
                                     oob_score=False,
                                     verbose=0,
                                     warm_start=False)
rf.fit(x_train, y_train)
rf_pred = rf.predict(x_test)
rf_prob = rf.predict_proba(x_test)

# SVM
svm = svm.SVC(gamma='scale', C=30000, probability=True)
svm.fit(x_train, y_train)
svm_pred = svm.predict(x_test)
svm_prob = svm.predict_proba(x_test)

# Gaussian Naive Bayes
gnb = GaussianNB()
gnb.fit(x_train, y_train)
gnb_pred = gnb.predict(x_test)
gnb_prob = gnb.predict_proba(x_test)

# Quadratic Discriminant Analysis
qda = QuadraticDiscriminantAnalysis()
qda = qda.fit(x_train, y_train)
qda_pred = qda.predict(x_test)
qda_prob = qda.predict_proba(x_test)

# Gaussian Process
#gp = GaussianProcessClassifier(1.0 * RBF(1.0))
Example #26
0
        df_data['collection'],
        test_size=0.1,
        random_state=1)

    # vetorize
    vectorizer = TfidfVectorizer()
    X_train_v = vectorizer.fit_transform(X_train)

    # svm
    svm = svm.SVC(C=1000, gamma='auto', probability=True)
    svm.fit(X_train_v, y_train)

    # test model
    X_test_v = vectorizer.transform(X_test)
    y_pred = svm.predict(X_test_v)
    y_pred_proba = svm.predict_proba(X_test_v)  # ham probability

    # summary
    pp.pprint(confusion_matrix(y_test, y_pred))
    score = svm.score(X_test_v, y_test)
    print(score)  # 98%

    # save results
    df_results = pd.DataFrame.from_dict({
        'X_test':
        X_test,
        'y_test':
        y_test,
        'y_pred':
        y_pred,
        'y_pred_proba':
    output_class = np.array(classif.predict(prediction_features))

    return output_class


#%%
'''Cell 5: Train and Evaluate model on Test'''

y_pred = train_and_predict(X_train, y_train, X_test)
if y_pred is not None:
    print(metrics.accuracy_score(y_test, y_pred))

#%%
'''Cell 6: ROC Curve, AUC, Confusion Matrix'''
# predict probabilities for X_test using predict_proba
probabilities = svm.predict_proba(X_test)

# select the probabilities for label 1.0
y_proba = probabilities[:, 1]

# calculate false positive rate and true positive rate at different thresholds
false_positive_rate, true_positive_rate, thresholds = roc_curve(y_test,
                                                                y_proba,
                                                                pos_label=1)

# calculate AUC
roc_auc = auc(false_positive_rate, true_positive_rate)

plt.title('Receiver Operating Characteristic')
# plot the false positive rate on the x axis and the true positive rate on the y axis
roc_plot = plt.plot(false_positive_rate,
Example #28
0
# logo = StratifiedKFold(n_splits=10)

fold = 1
for train_index, val_index in logo.split(train_input, train_true_class,
                                         train_group_id):
    X_train, X_test = train_input[train_index], train_input[val_index]
    y_train, y_test = train_true_class[train_index], train_true_class[
        val_index]
    # fit base classifiers on training data
    # and get predictions for test data
    forest.fit(X_train, y_train)
    pred_f = forest.predict_proba(X_test)
    bayes.fit(X_train, y_train)
    pred_b = bayes.predict_proba(X_test)
    svm.fit(X_train, y_train)
    pred_s = svm.predict_proba(X_test)
    #
    pred_features = np.concatenate((pred_f, pred_b, pred_s), axis=1)
    # pred_features = np.concatenate((pred_f, pred_s), axis=1)
    # pred_features = (pred_f + pred_s + pred_b)/3
    train_pred_X.append(pred_features)
    train_true_labels.append(y_test)
    train_groups.append(train_group_id[val_index])
    print("Fold " + str(fold) + "done")
    fold = fold + 1

train_level_one_data = np.concatenate(train_pred_X, axis=0)
train_level_one_labels = np.concatenate(train_true_labels)
train_level_one_groups = np.concatenate(train_groups)

with open('..\saves\meta_train_data_mixed.pickle', 'wb') as f:
Example #29
0
    results_rect = []
    results_label = []
    results_proba = []
    results_feature = []
    for featureIndex, feature in enumerate(features):

        # init decision of current bounding box
        maxProba = -1
        labelInMaxProba = 0
        rectInMaxProba = []

        for svmIndex, svm in enumerate(svms):
            svmClassLabel = svmIndex + 1
            print("---> svm index :", svmClassLabel)
            pred = svm.predict_proba([feature.tolist()])
            probaArr = pred[0]
            # not background
            if (probaArr[0] < probaArr[1]):
                print(" +++ detect object in this childImg.")
                print("proba :", probaArr[1])
                if (probaArr[1] > maxProba):
                    print("larger probability appear in this svm model.")
                    maxProba = probaArr[1]
                    labelInMaxProba = svmClassLabel
                    rectInMaxProba = verts[featureIndex]
        # use predict result on max probability as the final result
        if (maxProba > 0):
            results_label.append(labelInMaxProba)
            results_rect.append(rectInMaxProba)
            results_proba.append(maxProba)
Example #30
0
    lr.fit(X_train1, y_train1)
    y_pred1 = lr.predict(X_test1)

    print('roc_auc_score:', metrics.roc_auc_score(y_test1, y_pred1))

    y_predict_probabilities1 = lr.predict_proba(X_test1)[:, 1]
    fpr1, tpr1, _ = roc_curve(y_test1, y_predict_probabilities1)
    roc_auc1 = auc(fpr1, tpr1)

    svm = svm.SVC(kernel='linear',C=10, probability=True)
    svm.fit(X_train1, y_train1)
    y_pred2 = svm.predict(X_test1)

    print('roc_auc_score:', metrics.roc_auc_score(y_test1, y_pred2))

    y_predict_probabilities2 = svm.predict_proba(X_test1)[:,1]
    fpr2, tpr2, _ = roc_curve(y_test1, y_predict_probabilities2)
    roc_auc2 = auc(fpr2, tpr2)

data2 = pd.read_csv('English (unnormalized).csv')
y2 = data2['y']
x2 = data2.drop(labels=['y'], axis=1)
cv2 = StratifiedShuffleSplit(n_splits=1, test_size=0.2, random_state=1)

for train_index, test_index in cv2.split(x2, y2):
    X_train2, X_test2, y_train2, y_test2 = x2.iloc[train_index], x2.iloc[test_index], y2.iloc[train_index], y2.iloc[test_index]

    dt = DecisionTreeClassifier(random_state=10,max_depth=5, min_samples_leaf=1,max_features=0.8)
    dt.fit(X_train2, y_train2)
    y_pred3 = dt.predict(X_test2)
    'time': time_list
})

resultDf['trueMove'] = resultDf['trueMove'].astype(int)
resultDf['equal'] = (resultDf.predictionSVM == resultDf.trueMove.astype(int))

print('--------------Plot ROC-AUC --------------')
from sklearn import metrics
print("NB Accuracy", metrics.accuracy_score(resultDf.trueMove, pred_list_NB))

plt.figure(figsize=(9, 7))

y_pred_proba = nb.predict_proba(X_test_array)[::, 1]
fpr, tpr, _ = metrics.roc_curve(pred_list_NB, y_pred_proba)
auc = metrics.roc_auc_score(pred_list_NB, y_pred_proba)
plt.plot(fpr, tpr, label="NB auc=" + str('% 6.3f' % auc))

y_pred_proba2 = knn.predict_proba(X_test_array)[::, 1]
fpr, tpr, _ = metrics.roc_curve(pred_list_KNN, y_pred_proba2)
auc2 = metrics.roc_auc_score(pred_list_KNN, y_pred_proba2)
plt.plot(fpr, tpr, label="KNN auc=" + str('% 6.3f' % auc2))

y_pred_proba3 = svm.predict_proba(X_test_array)[::, 1]
fpr, tpr, _ = metrics.roc_curve(pred_list_SVM, y_pred_proba3)
auc3 = metrics.roc_auc_score(pred_list_SVM, y_pred_proba3)
plt.plot(fpr, tpr, label="SVM auc=" + str('% 6.3f' % auc3))

plt.xlabel("false positive")
plt.ylabel("true positive")
plt.legend(loc=4)
plt.show()
Example #32
0
svm.fit(X_train, y_train)

#Save SVM object
pickle_dump(svm, path='./checkpoints/SVM_Model')

################### Evaluate Model ###################
# generate predictions
y_pred = svm.predict(X_valid)

# calculate accuracy
accuracy = accuracy_score(Y_valid, y_pred)
print('Model accuracy is: ', accuracy)

###################  ROC curve & AUC ###################
# predict probabilities for X_test using predict_proba
probabilities = svm.predict_proba(X_valid)

# select the probabilities for label 1.0
y_proba = probabilities[:, 1]

# calculate false positive rate and true positive rate at different thresholds
false_positive_rate, true_positive_rate, thresholds = roc_curve(Y_valid,
                                                                y_proba,
                                                                pos_label=1)

# calculate AUC
roc_auc = auc(false_positive_rate, true_positive_rate)

plt.title('Receiver Operating Characteristic')
# plot the false positive rate on the x axis and the true positive rate on the y axis
roc_plot = plt.plot(false_positive_rate,