def main(train_f, test_f, param): #READY X,Y for training logging.info("loading trainning data") X, Y = load_data(train_f) logging.info("training using liblinear") model = liblinear_train(Y, X, param) logging.info('build nbsvm model') w, b = compute_NBSVM_param(model, param['beta']) logging.info("loading testing data") X, Y = load_data(test_f) logging.info("predict using nbsvm") p_labels = NBSVM_predict(w, b, X) positive_prf, negative_prf = calc_prf(Y, p_labels) positive_prf = map(lambda x: 100 * x, positive_prf) negative_prf = map(lambda x: 100 * x, negative_prf) acc = calc_acc(Y, p_labels) print "accuracy = %.2f %%" % (acc) print "positive class : p = %6.2f %% , r = %6.2f %% , f = %6.2f%%" % ( positive_prf[0], positive_prf[1], positive_prf[2]) print "negative class : p = %6.2f %% , r = %6.2f %% , f = %6.2f%%" % ( negative_prf[0], negative_prf[1], negative_prf[2])
def main(train_f , test_f , param) : #READY X,Y for training logging.info("loading trainning data") X , Y = load_data(train_f) logging.info("training using liblinear") model = liblinear_train(Y,X,param) logging.info("ready testing data") X , Y = load_data(test_f) logging.info("predict using liblinear") p_labels , p_acc , p_val = liblinearutil.predict(Y,X,model,"-q") positive_prf , negative_prf = calc_prf(Y,p_labels) positive_prf = map(lambda x:100*x , positive_prf) negative_prf = map(lambda x:100*x , negative_prf) print "accuracy = %.2f %%" %(p_acc[0]) print "positive class : p = %6.2f %% , r = %6.2f %% , f = %6.2f%%" %( positive_prf[0] , positive_prf[1] , positive_prf[2]) print "negative class : p = %6.2f %% , r = %6.2f %% , f = %6.2f%%" %( negative_prf[0] , negative_prf[1] , negative_prf[2])
def main(ifi, ofi, selected_label, ratio): logging.info('loading data ...') X, Y = load_data(ifi) instance_num = len(Y) labels = Counter(Y) try: assert selected_label in labels except AssertionError, e: print 'Not valid label of %d' % (selected_label) print 'valid labels should be %s' % (labels.keys()) return -1
def main(ifi , ofi , selected_label , ratio) : logging.info('loading data ...') X , Y = load_data(ifi) instance_num = len(Y) labels = Counter(Y) try : assert selected_label in labels except AssertionError , e : print 'Not valid label of %d' %(selected_label) print 'valid labels should be %s' %(labels.keys()) return -1
def main(train_f, test_f, param): #READY X,Y for training logging.info("loading trainning data") X, Y = load_data(train_f) logging.info("training using liblinear") model = liblinear_train(Y, X, param) logging.info("ready testing data") X, Y = load_data(test_f) logging.info("predict using liblinear") p_labels, p_acc, p_val = liblinearutil.predict(Y, X, model, "-q") positive_prf, negative_prf = calc_prf(Y, p_labels) positive_prf = map(lambda x: 100 * x, positive_prf) negative_prf = map(lambda x: 100 * x, negative_prf) print "accuracy = %.2f %%" % (p_acc[0]) print "positive class : p = %6.2f %% , r = %6.2f %% , f = %6.2f%%" % ( positive_prf[0], positive_prf[1], positive_prf[2]) print "negative class : p = %6.2f %% , r = %6.2f %% , f = %6.2f%%" % ( negative_prf[0], negative_prf[1], negative_prf[2])
def main(train_f , test_f , param) : #READY X,Y for training logging.info("loading trainning data") X , Y = load_data(train_f) logging.info("training using liblinear") model = liblinear_train(Y,X,param) logging.info('build nbsvm model') w , b = compute_NBSVM_param(model , param['beta']) logging.info("loading testing data") X , Y = load_data(test_f) logging.info("predict using nbsvm") p_labels = NBSVM_predict(w,b,X) positive_prf , negative_prf = calc_prf(Y,p_labels) positive_prf = map(lambda x:100*x , positive_prf) negative_prf = map(lambda x:100*x , negative_prf) acc = calc_acc(Y,p_labels) print "accuracy = %.2f %%" %(acc) print "positive class : p = %6.2f %% , r = %6.2f %% , f = %6.2f%%" %( positive_prf[0] , positive_prf[1] , positive_prf[2]) print "negative class : p = %6.2f %% , r = %6.2f %% , f = %6.2f%%" %( negative_prf[0] , negative_prf[1] , negative_prf[2])
from collections import OrderedDict from math import sqrt from file_handler import load_data, load_info, load_item movies = load_item() ratings = load_data() def find_movie_by_title(title): index_title = 1 title_right_index = -7 # index trim for movie in movies: # check movie title by trim it from right first if movie[index_title][:title_right_index] == title: return movie return None def get_movie_id_by_title(title): movie = find_movie_by_title(title) index_movie_id = 0 if movie is not None: return movie[index_movie_id] # return int(movie[index_movie_id]) else: return None def get_jaccard_coefficient(first_movie_id, second_movie_id):
import pandas as pd import numpy as np import xgboost as xgb import utils import file_handler as fh """ Script to benchmark the performances of XGBoost Launches cross validations of the model, we can choose some parameters to make vary in order to compare performances and tune the parameters values """ train = fh.load_data('train') # Data transformation y = train.QuoteConversion_Flag.values train = train.drop(['QuoteNumber', 'QuoteConversion_Flag'], axis=1) train = utils.transform_dates(train) train = train.fillna(-1) train = utils.transform_categorical_features_train(train) data_dm = xgb.DMatrix(train.values, y) # base values for parameters xgb_base_parameters = { 'nthread':-1, 'n_estimators':25, 'max_depth':15, 'learning_rate':0.025, 'silent':True, 'subsample': 1,
import xgboost as xgb import utils import file_handler as fh """ Script that loads the models from models folder, performs the prediction on the test dataset and output the result in results folder """ knn_features = ['PropertyField37','SalesField5','PersonalField9','Field7','PersonalField2', 'PersonalField1','SalesField4','PersonalField10A','SalesField1B', 'PersonalField10B', 'PersonalField12'] if __name__ == "__main__": # load data train = fh.load_data('train') test = fh.load_data('test') # transform data Y_train = train.QuoteConversion_Flag.values train = train.drop(['QuoteNumber', 'QuoteConversion_Flag'], axis=1) test = test.drop('QuoteNumber', axis=1) train = utils.transform_dates(train) test = utils.transform_dates(test) train = train.fillna(-1) test = test.fillna(-1) train, test = utils.transform_categorical_features_test_train(train, test) # transform data for knn knn_train = train.loc[:, knn_features] knn_test = test.loc[:, knn_features]