Ejemplo n.º 1
0
def model_lda(train, test, label):

    reglin = discriminant_analysis.LinearDiscriminantAnalysis()
    reglin.fit(train[label], train['hotel_cluster'])

    prediction = reglin.predict_proba(test[label])

    return util.best_proba(prediction), reglin
Ejemplo n.º 2
0
def model_knn(train, test, label):

    neigh = KNeighborsClassifier(n_neighbors=30)
    neigh.fit(train[label], train['hotel_cluster'])

    prediction = neigh.predict_proba(test[label])

    return util.best_proba(prediction), neigh
Ejemplo n.º 3
0
def model_reglog(train, test, label):

    reglog = linear_model.LogisticRegression()
    reglog.fit(train[label], train['hotel_cluster'])

    prediction = reglog.predict_proba(test[label])

    return util.best_proba(prediction), reglog
Ejemplo n.º 4
0
def model_lasso(train, test, label):
    C = 0.01
    lasso = linear_model.LogisticRegression(C=C, penalty="l2")
    lasso.fit(train[label], train['hotel_cluster'])

    prediction = lasso.predict_proba(test[label])

    return util.best_proba(prediction), lasso
Ejemplo n.º 5
0
def model_rforest(train,test,label):
    rf = ske.RandomForestClassifier(n_estimators=10,criterion="gini")
    
    rf.fit(train[label],train['hotel_cluster'])

    prediction = rf.predict_proba(test[label])
    
    return util.best_proba(prediction), rf
Ejemplo n.º 6
0
def model_SVM(train, test, label):
    SVM = svm.SVC(kernel='rbf', probability=True)

    SVM.fit(train[label], train['hotel_cluster'])

    prediction = SVM.predict_proba(test[label])

    return util.best_proba(prediction), SVM
Ejemplo n.º 7
0
def model_labelprop(train, test, label):

    lp = sm.LabelPropagation(kernel='rbf')
    lp.fit(train[label], train['hotel_cluster'])

    prediction = lp.predict_proba(test[label])

    return util.best_proba(prediction), lp
Ejemplo n.º 8
0
def model_adaboost(train, test, label):

    adab = AdaBoostClassifier(learning_rate=0.1, n_estimators=100)

    adab.fit(train[label], train['hotel_cluster'])

    prediction = adab.predict_proba(test[label])

    return util.best_proba(prediction), adab
Ejemplo n.º 9
0
def model_dec_tree(train,test,label):
    
    dectree = DecisionTreeClassifier(criterion='gini', splitter='best', max_depth=None, max_features=None)
    dectree.fit(train[label], train["hotel_cluster"]) 

    
    prediction = dectree.predict_proba(test[label])
    
    return util.best_proba(prediction), dectree
Ejemplo n.º 10
0
def model_gradboost(train, test, label):

    gradb = GradientBoostingClassifier(n_estimators=10)

    gradb.fit(train[label], train['hotel_cluster'])

    prediction = gradb.predict_proba(test[label])

    return util.best_proba(prediction), gradb
Ejemplo n.º 11
0
def model_xgb(train, test, label):

    xgb = sklearn.XGBClassifier(nthread=4, n_estimators=10)

    xgb.fit(train[label], train['hotel_cluster'])

    prediction = xgb.predict_proba(test[label])

    df = pd.DataFrame(prediction).transpose().tail(test[label].shape[0])

    return util.best_proba(df.as_matrix()), xgb
Ejemplo n.º 12
0
def error(weight, matrix_pred, test):

    prediction_final = map(np.multiply, weight, matrix_pred)
    prediction_final = np.sum(prediction_final, axis=0)

    clusters = util.best_proba(prediction_final)

    def temp(i, val):
        actual[i] = [val]

    actual = range(len(test['hotel_cluster']))
    map(temp, range(len(test['hotel_cluster'])), test['hotel_cluster'])

    return 1 - util.mapk(actual, clusters)
Ejemplo n.º 13
0
def model_neural(train, test, label):
    model = Sequential()
    model.add(
        Dense(12, input_dim=len(label), init='uniform', activation='relu'))
    model.add(Dense(len(label), init='uniform', activation='relu'))
    model.add(Dense(1, init='uniform', activation='sigmoid'))
    model.compile(loss='binary_crossentropy',
                  optimizer='adam',
                  metrics=['accuracy'])
    model.fit(train[label],
              train['hotel_cluster'],
              nb_epoch=150,
              batch_size=10)

    prediction = model.predict_proba(test[label])

    return util.best_proba(prediction)
Ejemplo n.º 14
0
def model_bagging(train, test, label, grid_elt=None):

    if grid_elt is None:
        bag = ensemble.BaggingClassifier(KNeighborsClassifier(),
                                         n_jobs=1,
                                         n_estimators=30,
                                         max_samples=1.0,
                                         max_features=0.1)
    else:
        bag = ensemble.BaggingClassifier(KNeighborsClassifier(),
                                         n_jobs=1,
                                         n_estimators=grid_elt[0],
                                         max_samples=grid_elt[1],
                                         max_features=grid_elt[2])

    bag.fit(train[label], train['hotel_cluster'])

    prediction = bag.predict_proba(test[label])

    return util.best_proba(prediction), bag
Ejemplo n.º 15
0
def model_voting(train, test, label, grid_elt=None):

    clf1 = LogisticRegression(random_state=1)
    clf2 = GradientBoostingClassifier(n_estimators=10)
    clf3 = KNeighborsClassifier(n_neighbors=30)

    if grid_elt is None:
        eclf1 = VotingClassifier(estimators=[('lr', clf1), ('gb', clf2),
                                             ('knn', clf3)],
                                 voting='soft',
                                 weights=[1, 1, 1])
    else:
        eclf1 = VotingClassifier(estimators=[('lr', clf1), ('gb', clf2),
                                             ('knn', clf3)],
                                 voting='soft',
                                 weights=grid_elt)

    eclf1.fit(train[label], train['hotel_cluster'])

    prediction = eclf1.predict_proba(test[label])

    return util.best_proba(prediction), eclf1
Ejemplo n.º 16
0
def model_weighted(train, test, label):

    weight = []

    reglog = linear_model.LogisticRegression()
    reglog.fit(train[label], train['hotel_cluster'])
    prediction_reglog = reglog.predict_proba(test[label])

    gradb = GradientBoostingClassifier(n_estimators=10)
    gradb.fit(train[label], train['hotel_cluster'])
    prediction_gradboost = gradb.predict_proba(test[label])

    neigh = KNeighborsClassifier(n_neighbors=10)
    neigh.fit(train[label], train['hotel_cluster'])
    prediction_knn = neigh.predict_proba(test[label])

    matrix_pred = [prediction_reglog, prediction_gradboost, prediction_knn]

    prediction_final = map(np.multiply, weight, matrix_pred)
    prediction_final = np.sum(prediction_final, axis=0)

    return util.best_proba(prediction_final)