Example #1
0
def main(train_f, test_f, param):

    #READY X,Y for training
    logging.info("loading trainning data")
    X, Y = load_data(train_f)

    logging.info("training using liblinear")
    model = liblinear_train(Y, X, param)

    logging.info('build nbsvm model')
    w, b = compute_NBSVM_param(model, param['beta'])

    logging.info("loading testing data")
    X, Y = load_data(test_f)

    logging.info("predict using nbsvm")
    p_labels = NBSVM_predict(w, b, X)
    positive_prf, negative_prf = calc_prf(Y, p_labels)
    positive_prf = map(lambda x: 100 * x, positive_prf)
    negative_prf = map(lambda x: 100 * x, negative_prf)
    acc = calc_acc(Y, p_labels)
    print "accuracy = %.2f %%" % (acc)
    print "positive class : p = %6.2f %% , r = %6.2f %% , f = %6.2f%%" % (
        positive_prf[0], positive_prf[1], positive_prf[2])
    print "negative class : p = %6.2f %% , r = %6.2f %% , f = %6.2f%%" % (
        negative_prf[0], negative_prf[1], negative_prf[2])
def main(train_f , test_f , param) :

    #READY X,Y for training
    logging.info("loading trainning data")
    X , Y = load_data(train_f)
    
    logging.info("training using liblinear")
    model = liblinear_train(Y,X,param)
    
    logging.info("ready testing data")
    X , Y = load_data(test_f)
    
    logging.info("predict using liblinear")
    p_labels , p_acc , p_val = liblinearutil.predict(Y,X,model,"-q")
    positive_prf , negative_prf = calc_prf(Y,p_labels)
    positive_prf = map(lambda x:100*x , positive_prf)
    negative_prf = map(lambda x:100*x , negative_prf)
    print "accuracy = %.2f %%" %(p_acc[0])
    print "positive class : p = %6.2f %% , r = %6.2f %% , f = %6.2f%%" %( positive_prf[0] , positive_prf[1] , positive_prf[2])
    print "negative class : p = %6.2f %% , r = %6.2f %% , f = %6.2f%%" %( negative_prf[0] , negative_prf[1] , negative_prf[2])
Example #3
0
def main(ifi, ofi, selected_label, ratio):
    logging.info('loading data ...')
    X, Y = load_data(ifi)
    instance_num = len(Y)
    labels = Counter(Y)
    try:
        assert selected_label in labels
    except AssertionError, e:
        print 'Not valid label of %d' % (selected_label)
        print 'valid labels should be %s' % (labels.keys())
        return -1
def main(ifi , ofi , selected_label , ratio) :
    logging.info('loading data ...')
    X , Y = load_data(ifi)
    instance_num = len(Y)
    labels = Counter(Y)
    try :
        assert selected_label in labels
    except AssertionError , e :
        print 'Not valid label of %d' %(selected_label)
        print 'valid labels should be %s' %(labels.keys())
        return -1
def main(train_f, test_f, param):

    #READY X,Y for training
    logging.info("loading trainning data")
    X, Y = load_data(train_f)

    logging.info("training using liblinear")
    model = liblinear_train(Y, X, param)

    logging.info("ready testing data")
    X, Y = load_data(test_f)

    logging.info("predict using liblinear")
    p_labels, p_acc, p_val = liblinearutil.predict(Y, X, model, "-q")
    positive_prf, negative_prf = calc_prf(Y, p_labels)
    positive_prf = map(lambda x: 100 * x, positive_prf)
    negative_prf = map(lambda x: 100 * x, negative_prf)
    print "accuracy = %.2f %%" % (p_acc[0])
    print "positive class : p = %6.2f %% , r = %6.2f %% , f = %6.2f%%" % (
        positive_prf[0], positive_prf[1], positive_prf[2])
    print "negative class : p = %6.2f %% , r = %6.2f %% , f = %6.2f%%" % (
        negative_prf[0], negative_prf[1], negative_prf[2])
def main(train_f , test_f , param) :

    #READY X,Y for training
    logging.info("loading trainning data")
    X , Y = load_data(train_f)
    
    logging.info("training using liblinear")
    model = liblinear_train(Y,X,param)
    
    logging.info('build nbsvm model')
    w , b = compute_NBSVM_param(model , param['beta'])

    logging.info("loading testing data")
    X , Y = load_data(test_f)
    
    logging.info("predict using nbsvm")
    p_labels  = NBSVM_predict(w,b,X)
    positive_prf , negative_prf = calc_prf(Y,p_labels)
    positive_prf = map(lambda x:100*x , positive_prf)
    negative_prf = map(lambda x:100*x , negative_prf)
    acc = calc_acc(Y,p_labels)
    print "accuracy = %.2f %%" %(acc)
    print "positive class : p = %6.2f %% , r = %6.2f %% , f = %6.2f%%" %( positive_prf[0] , positive_prf[1] , positive_prf[2])
    print "negative class : p = %6.2f %% , r = %6.2f %% , f = %6.2f%%" %( negative_prf[0] , negative_prf[1] , negative_prf[2])
from collections import OrderedDict
from math import sqrt

from file_handler import load_data, load_info, load_item

movies = load_item()
ratings = load_data()


def find_movie_by_title(title):
    index_title = 1
    title_right_index = -7  # index trim
    for movie in movies:
        # check movie title by trim it from right first
        if movie[index_title][:title_right_index] == title:
            return movie

    return None


def get_movie_id_by_title(title):
    movie = find_movie_by_title(title)
    index_movie_id = 0
    if movie is not None:
        return movie[index_movie_id]
        # return int(movie[index_movie_id])
    else:
        return None


def get_jaccard_coefficient(first_movie_id, second_movie_id):
import pandas as pd
import numpy as np
import xgboost as xgb
import utils
import file_handler as fh

"""
Script to benchmark the performances of XGBoost
Launches cross validations of the model, we can choose some
parameters to make vary in order to compare performances and
tune the parameters values
"""

train = fh.load_data('train')

# Data transformation
y = train.QuoteConversion_Flag.values
train = train.drop(['QuoteNumber', 'QuoteConversion_Flag'], axis=1)
train = utils.transform_dates(train)
train = train.fillna(-1)
train = utils.transform_categorical_features_train(train)
data_dm = xgb.DMatrix(train.values, y)  

# base values for parameters
xgb_base_parameters = {  
	'nthread':-1,
	'n_estimators':25,
    'max_depth':15,
    'learning_rate':0.025,
    'silent':True,
    'subsample': 1,
Example #9
0
import xgboost as xgb
import utils
import file_handler as fh

"""
Script that loads the models from models folder, performs the prediction on the test
dataset and output the result in results folder
"""

knn_features = ['PropertyField37','SalesField5','PersonalField9','Field7','PersonalField2',
'PersonalField1','SalesField4','PersonalField10A','SalesField1B', 'PersonalField10B',
'PersonalField12']

if __name__ == "__main__":
	# load data
	train = fh.load_data('train')
	test = fh.load_data('test')

	# transform data 
	Y_train = train.QuoteConversion_Flag.values
	train = train.drop(['QuoteNumber', 'QuoteConversion_Flag'], axis=1)
	test = test.drop('QuoteNumber', axis=1)
	train = utils.transform_dates(train)
	test = utils.transform_dates(test)
	train = train.fillna(-1)
	test = test.fillna(-1)
	train, test = utils.transform_categorical_features_test_train(train, test)

	# transform data for knn
	knn_train = train.loc[:, knn_features]
	knn_test = test.loc[:, knn_features]