def transform_to_rv_one_user(user_id):

    print "loading data for user " + str(user_id)
    [rfv, featuresnames, valuesnames,
     recordsdates] = RVFDataExtractor.load_rvf(user_id)

    realization_names = []
    rvid_table = dict()
    id = 0
    for f in valuesnames:
        f_name = featuresnames[f]
        rvid_table[f] = dict()
        for v, v_name in enumerate(valuesnames[f]):
            realization_names += [f_name + "__" + v_name]
            rvid_table[f][v] = id
            id += 1

    rv_data = dict()
    for r in rfv:
        rv_data[r] = []
        for f in rfv[r]:
            for v in rfv[r][f]:
                rv_data[r] += [rvid_table[f][v]]

    #pdb.set_trace()

    RVDataExtractor.save_rv(user_id, rv_data, realization_names, recordsdates)
def transform_to_rv_one_user(user_id):
	
		
	print "loading data for user "+str(user_id)
	[rfv, featuresnames, valuesnames, recordsdates] = RVFDataExtractor.load_rvf(user_id)
	
	
	realization_names = []
	rvid_table = dict()
	id = 0
	for f in valuesnames:
		f_name = featuresnames[f]
		rvid_table[f]=dict()
		for v, v_name in enumerate(valuesnames[f]):
			realization_names += [f_name+"__"+v_name]
			rvid_table[f][v]=id
			id+=1
	
	rv_data = dict()
	for r in rfv:
		rv_data[r] = []
		for f in rfv[r]:
			for v in rfv[r][f]:
				rv_data[r] += [rvid_table[f][v]]
				
	#pdb.set_trace()
	
	RVDataExtractor.save_rv(user_id, rv_data, realization_names, recordsdates)
	

	
	
def evaluation_on_day_prediction_one_user(user_id):
	print "loading matrix user "+str(user_id)+"..."
	data_matrix = MDataExtractor.load_matrix(user_id)
	rows_labels =  MDataExtractor.load_labels_vector(user_id)
	columns_labels = MDataExtractor.load_time_vector(user_id)
	importance_scores = MDataExtractor.load_importance_scores(user_id)
	print "user "+str(user_id)+" has "+str(len(rows_labels))+" features (rows) and "+str(len(columns_labels))+" realization (columns)"
	
	print "loading rfv user "+str(user_id)+"..."
	[rfvdata, featuresnames, valuesnames, recordsdates] = RVFDataExtractor.load_rvf(user_id)
	vocab_size = [len(valuesnames[f]) for f in range(len(valuesnames.keys()))]
	print "user "+str(user_id)+" has "+str(len(rfvdata))+" records"
	
	print "loading rv user "+str(user_id)+"..."
	[rv_data, realizationsnames, recordsdates] = RVDataExtractor.load_rv(user_id)
	realization_size = len(realizationsnames)
	
	
	classifiers = {"Random ": RandomClassifier()}#add the random classifier
	classifiers = {"Most Frequent ": SVDClassifier("idf", 1)}#add the most frequent classifier
	
	
	'''
	for k in range(1,21):
		classifiers["GHCM_MDT k="+str(k)] = GHCM_MDT_Value_Classifier(k, vocab_size) #add a ghcmdt classifier for each k
		classifiers["LCBMF k="+str(k)] = LCBMFClassifier(None, None, None, k) #add a lcbmf classifier for each k
		 #add a svd classifier for each k'''
		
	'''for k in [1,10,20,30,40,50,60]:
		c =GHCM_MDT_Value_Classifier(k, vocab_size)
		classifiers["GHCM_MDT k="+str(k)] = c'''
		
	for k in [1,10,20,30,50,70]:
		classifiers["SVD k="+str(k)] = SVDClassifier("idf", k)
		classifiers["LDA k="+str(k)] = LDA_Value_Classifier(k, realization_size)
		classifiers["HCM_MDT k="+str(k)] = HCM_MDT_Value_Classifier(k, vocab_size)
		classifiers["GHCM_MDT k="+str(k)] = GHCM_MDT_Value_Classifier(k, vocab_size)
		classifiers["LCBMF k="+str(k)] = LCBMFClassifier(None, None, None, k)
		
	
	eval_handler = FpEvaluatorHandler()
	eval_handler.instanciate_m (MDayPredictionEvaluator, data_matrix, rows_labels, columns_labels)
	eval_handler.instanciate_rvf (RfvDayPredictionEvaluator, rfvdata, featuresnames, valuesnames)
	eval_handler.instantiate_rv(RvDayPredictionEvaluator,rv_data, realizationsnames)
	
	evaluations = {}
	nb=1
	for name, classifier in classifiers.iteritems():
		print "evaluating "+name+" on user "+str(user_id)+" nb "+str(nb)+"/"+str(len(classifiers))
		#results = [good_predictions, total_predictions, accuracy, macro_average_acc_by_class, accuracy_class1,...,accuracy_classn]
		results = eval_handler.evaluate(classifier)
		evaluations[name] = results
		nb+=1
		
		
	classes = 	eval_handler.classes
	return [evaluations, classes]	
Esempio n. 4
0
def evaluation_on_applaunch_prediction_one_user(user_id):
    print "loading rfv user " + str(user_id) + "..."
    [rfvdata, featuresnames, valuesnames,
     recordsdates] = RVFDataExtractor.load_rvf(user_id)
    vocab_size = [len(valuesnames[f]) for f in range(len(valuesnames.keys()))]
    print "user " + str(user_id) + " has " + str(len(rfvdata)) + " records"

    print "loading rv user " + str(user_id) + "..."
    [rv_data, realizationsnames,
     recordsdates] = RVDataExtractor.load_rv(user_id)
    realization_size = len(realizationsnames)

    classifiers = {"Random ": RandomClassifier()}  #add the random classifier
    classifiers = {
        "Most Frequent ": HCM_MDT_Value_Classifier(1, vocab_size)
    }  #add the most frequent classifier
    '''
	for k in range(1,21):
		classifiers["GHCM_MDT k="+str(k)] = GHCM_MDT_Value_Classifier(k, vocab_size) #add a ghcmdt classifier for each k
		classifiers["LCBMF k="+str(k)] = LCBMFClassifier(None, None, None, k) #add a lcbmf classifier for each k
		 #add a svd classifier for each k'''
    '''for k in [1,10,20,30,40,50,60]:
		c =GHCM_MDT_Value_Classifier(k, vocab_size)
		classifiers["GHCM_MDT k="+str(k)] = c'''

    for k in [1, 10, 20, 30, 50, 70]:
        classifiers["LDA k=" + str(k)] = LDA_Value_Classifier(
            k, realization_size)
        classifiers["GHCM_MDT k=" + str(k)] = GHCM_MDT_Value_Classifier(
            k, vocab_size)
        classifiers["HCM_MDT k=" + str(k)] = HCM_MDT_Value_Classifier(
            k, vocab_size)

    eval_handler = FpEvaluatorHandler()
    eval_handler.instanciate_rvf(RfvApplaunchPredictionEvaluator, rfvdata,
                                 featuresnames, valuesnames)
    eval_handler.instantiate_rv(RvApplaunchPredictionEvaluator, rv_data,
                                realizationsnames)

    evaluations = {}
    nb = 1
    for name, classifier in classifiers.iteritems():
        print "evaluating " + name + " on user " + str(user_id) + " nb " + str(
            nb) + "/" + str(len(classifiers))
        #results = [good_predictions, total_predictions, accuracy, macro_average_acc_by_class, accuracy_class1,...,accuracy_classn]
        results = eval_handler.evaluate(classifier)
        evaluations[name] = results
        nb += 1

    classes = eval_handler.classes
    return [evaluations, classes]
#!/usr/bin/env python
import sys
sys.path.insert(0, "/home/dehajjik/workspace/src/utils")
from transform_to_rv_one_user import transform_to_rv_one_user as ttrou
from rv_data_utils import RVDataExtractor


'''
make the categorized data transformation for all the users
'''
for user_id in RVDataExtractor.users_ids_list():
	ttrou(user_id)
	
	print("user "+str(user_id)+" extracted")
	print 
Esempio n. 6
0
def evaluation_on_location_prediction_one_user(user_id):

    print "loading matrix user " + str(user_id) + "..."
    data_matrix = MDataExtractor.load_matrix(user_id)
    rows_labels = MDataExtractor.load_labels_vector(user_id)
    columns_labels = MDataExtractor.load_time_vector(user_id)
    importance_scores = MDataExtractor.load_importance_scores(user_id)
    print "user " + str(user_id) + " has " + str(
        len(rows_labels)) + " features (rows) and " + str(
            len(columns_labels)) + " realization (columns)"

    print "loading rfv user " + str(user_id) + "..."
    [rfvdata, featuresnames, valuesnames,
     recordsdates] = RVFDataExtractor.load_rvf(user_id)
    vocab_size = [len(valuesnames[f]) for f in range(len(valuesnames.keys()))]
    print "user " + str(user_id) + " has " + str(len(rfvdata)) + " records"

    print "loading rv user " + str(user_id) + "..."
    [rv_data, realizationsnames,
     recordsdates] = RVDataExtractor.load_rv(user_id)
    realization_size = len(realizationsnames)

    classifiers = {"Random ": RandomClassifier()}  #add the random classifier
    classifiers = {
        "Most Frequent ": SVDClassifier("idf", 1)
    }  #add the most frequent classifier
    '''
	for k in range(1,21):
		classifiers["GHCM_MDT k="+str(k)] = GHCM_MDT_Value_Classifier(k, vocab_size) #add a ghcmdt classifier for each k
		classifiers["LCBMF k="+str(k)] = LCBMFClassifier(None, None, None, k) #add a lcbmf classifier for each k
		classifiers["SVD k="+str(k)] = SVDClassifier("idf", k) #add a svd classifier for each k'''
    '''for k in [1,10,20,30,40,50,60]:
		c =GHCM_MDT_Value_Classifier(k, vocab_size)
		classifiers["GHCM_MDT k="+str(k)] = c'''
    '''for k in [10,15,20,25,30,35]:
		c =GHCM_MDT_Value_Classifier(k, vocab_size)
		classifiers["GHCM_MDT k="+str(k)] = c'''

    for k in [1, 10, 20, 30, 50, 70]:
        classifiers["SVD k=" + str(k)] = SVDClassifier("idf", k)
        classifiers["LDA k=" + str(k)] = LDA_Value_Classifier(
            k, realization_size)
        classifiers["HCM_MDT k=" + str(k)] = HCM_MDT_Value_Classifier(
            k, vocab_size)
        classifiers["GHCM_MDT k=" + str(k)] = GHCM_MDT_Value_Classifier(
            k, vocab_size)
        classifiers["LCBMF k=" + str(k)] = LCBMFClassifier(None, None, None, k)

    eval_handler = FpEvaluatorHandler()
    eval_handler.instanciate_m(MLocationPredictionEvaluator, data_matrix,
                               rows_labels, columns_labels)
    eval_handler.instanciate_rvf(RfvLocationPredictionEvaluator, rfvdata,
                                 featuresnames, valuesnames)
    eval_handler.instantiate_rv(RvLocationPredictionEvaluator, rv_data,
                                realizationsnames)

    evaluations = {}

    for name, classifier in classifiers.iteritems():
        print "evaluating " + name + " on user " + str(user_id)
        #results = [good_predictions, total_predictions, accuracy, macro_average_acc_by_class, accuracy_class1,...,accuracy_classn]
        results = eval_handler.evaluate(classifier)
        evaluations[name] = results

    classes = eval_handler.classes
    return [evaluations, classes]
#!/usr/bin/env python
import sys
sys.path.insert(0, "/home/dehajjik/workspace/src/utils")
from transform_to_rv_one_user import transform_to_rv_one_user as ttrou
from rv_data_utils import RVDataExtractor
'''
make the categorized data transformation for all the users
'''
for user_id in RVDataExtractor.users_ids_list():
    ttrou(user_id)

    print("user " + str(user_id) + " extracted")
    print