Python LogsFileWriter.close 예제들, logs_file_writer.LogsFileWriter.close Python 예제들

예제 #1

0

파일 보기

파일: evaluation_on_feature_prediction_all_users.py 프로젝트: khajji/master_thesis_report

def evaluation_on_feature_prediction_all_users(feature_name):
	#contains the scores of the different predictors combined for all the users
	mixed_evaluations = {}

	#contains the scores details of the different predictors for each user 
	detailed_evaluations = {}
	iter = 0
	for user_id in MDataExtractor.users_ids_list():
		print("user "+str(user_id)+" on working")
		if feature_name == "day":
			[evaluations,classes] = eodpou(user_id)
			file_name = "evaluation_day_prediction"
		elif feature_name == "location":
			[evaluations,classes] = eolpou(user_id)
			file_name = "evaluation_location_prediction"
		elif feature_name == "applaunch":
			[evaluations,classes] = eoapou(user_id)
			file_name = "evaluation_applaunch_prediction"
		else:
			raise Exception("NOT IMPLEMENTED FEATURE EXCEPTION: the feature "+str(feature_name)+" do not implemented or do not exist")
		
		if len(detailed_evaluations.keys()) == 0:
			#instantiate the evaluations objects add the classifier names for detailed_evaluations and detailed_evaluations
			for classifier_name, evaluation_results in evaluations.iteritems():
				mixed_evaluations[classifier_name] = {"correct predictions":0 , "total predictions":0, "accuracy":0, "average accuracy":0, "average macro_accuracy":0}
				detailed_evaluations[classifier_name] = {}
				
		#integrate the evaluations of the current users
		for classifier_name, evaluation_results in evaluations.iteritems():
			#evaluation_results = [good_predictions, total_predictions, accuracy, macro_average_acc_by_class, accuracy_class1,...,accuracy_classn]
			#update the mixed evaluations
			mixed_evaluations[classifier_name]["correct predictions"]+=evaluation_results[0]
			mixed_evaluations[classifier_name]["total predictions"]+=evaluation_results[1]
			mixed_evaluations[classifier_name]["accuracy"]= (mixed_evaluations[classifier_name]["correct predictions"]*1.0)/mixed_evaluations[classifier_name]["total predictions"]
			mixed_evaluations[classifier_name]["average accuracy"] = ((mixed_evaluations[classifier_name]["average accuracy"]*iter)+evaluation_results[2])/((iter+1)*1.0)
			mixed_evaluations[classifier_name]["average macro_accuracy"] = ((mixed_evaluations[classifier_name]["average macro_accuracy"]*iter)+evaluation_results[3])/((iter+1)*1.0)
			
			#update the detailed evaluations
			detailed_evaluations[classifier_name]["user "+str(user_id)]= {"correct predictions": evaluation_results[0], "total predictions":evaluation_results[1], "accuracy":evaluation_results[2], 
												"macro_accuracy":evaluation_results[3], "accuracy by class": evaluation_results[4:]}
		
		iter+=1
		print("user "+str(user_id)+" extracted")
		
		
		
	#write the results in a log file
	
	#write the dictionaries into files
	out = LogsFileWriter.open(file_name)
	LogsFileWriter.write("predictions on the classes "+JsonUtils.dict_as_json_str(classes)+"\n",out)
	LogsFileWriter.write("Total scores :\n",out)
	LogsFileWriter.write(JsonUtils.dict_as_json_str(mixed_evaluations),out)
	LogsFileWriter.write("detailed scores :\n",out)
	LogsFileWriter.write(JsonUtils.dict_as_json_str(detailed_evaluations),out)
	LogsFileWriter.close(out)

예제 #2

0

파일 보기

def compare(reference_transformation, user_id):
	global labels_importance
	global labels_importance_rank
	#global labels_importance_derivative
	index = 0
	transformations = transformation_vectors.keys()
	for label in rows_labels:
		labels_importance[label] = {}
		labels_importance_rank[label] = {}
		for transformation in transformations:
			labels_importance[label][transformation]=transformation_vectors[transformation][0][index]
			labels_importance_rank[label][transformation]= transformation_vectors[transformation][1][index]
			#labels_importance_derivative[label][transformation]= transformation_vectors[transformation][2][index]
		
		index +=1
		
		
	#sort the dictionaries per presence rate. The most frequent feature at the biginning
	labels_importance = collections.OrderedDict(sorted(labels_importance.items(), key=lambda x: x[1][reference_transformation], reverse = True))
	#labels_importance_derivative = collections.OrderedDict(sorted(labels_importance_derivative.items(), key=lambda x: x[1][reference_transformation], reverse = True))
	labels_importance_rank = collections.OrderedDict(sorted(labels_importance_rank.items(), key=lambda x: x[1][reference_transformation]))
	
	
	print JsonUtils.dict_as_json_str(labels_importance)
	
	print JsonUtils.dict_as_json_str(labels_importance_rank)
	#print np.shape(data_matrix)
	
	#write the dictionaries into files
	out = LogsFileWriter.open(file_name)
	LogsFileWriter.write(JsonUtils.dict_as_json_str(labels_importance),out)
	LogsFileWriter.write(JsonUtils.dict_as_json_str(labels_importance_rank),out)
	LogsFileWriter.close(out)
	
	
	#plot the records importance vs different transformation scores
	importances_list = []
	importances_legends = []
	ranks_list = []
	ranks_legends = []
	importances_derivatives_list = []
	importances_derivatives_legends = []
	for transformation in transformations:
		importance_list = [importance[transformation] for importance in labels_importance.values()]
		importances_list.append(importance_list)
		importances_legends.append(transformation)
		
		rank_list = [rank[transformation] for rank in labels_importance_rank.values()]
		ranks_list.append(rank_list)
		ranks_legends.append(transformation)
		
		importance_derivative_list = np.diff(np.asarray(importance_list), 1).tolist()
		importances_derivatives_list.append(importance_derivative_list)
		importances_derivatives_legends.append(transformation)
		
		
	importances_derivatives_list.append([0]*len(importances_derivatives_list[0]))
	importances_derivatives_legends.append("y=0")
	PlotlibDrawer.plot_1(labels_importance.keys(), [percentage["presence_percentage"] for percentage in labels_importance.values()], "features rank", "% records", "presence rate of the features in the records", 10)
	PlotlibDrawer.plot_2(labels_importance.keys(), importances_list, importances_legends, "features rank", "features scores", "comparison of different transformation scores "+str(user_id), 11)
	PlotlibDrawer.plot_2(labels_importance_rank.keys(), ranks_list, ranks_legends, "features initial rank", "features rank after transformation", "comparison of different transformation ranks "+str(user_id), 11)
	PlotlibDrawer.plot_2(labels_importance.keys(), importances_derivatives_list, importances_derivatives_legends, "features initial rank", "features scores derivative", "comparison of different transformation scores derivative "+str(user_id), 11)

예제 #3

0

파일 보기

파일: features_presence_rate_all_users.py 프로젝트: khajji/master_thesis_report

#!/usr/bin/env python
import sys
import pprint as pp
import os.path

sys.path.insert(0, "/home/dehajjik/workspace/src/utils")
from features_presence_rate_one_user import features_presence_rate_one_user
from logs_file_writer import LogsFileWriter


json_data_dir = "/speech/dbwork/mul/reco1/AppPrediction/SonyLogging/Logs/from_TKY/pulled_from_TKY/mixs_launcher_logs/json/"


#array containing the path to the validated json fata for each user
users_json_files_array = [json_data_dir+x+"/all/all_in_one_validated_log.json" for x in os.listdir(json_data_dir)]

pp.pprint(users_json_files_array)
features_presence_rate =''
user_number = 1
for json_file in users_json_files_array:
	if os.path.isfile(json_file):
		features_presence_rate = features_presence_rate + "\n \n \n user "+str(user_number)+"\n"+ features_presence_rate_one_user(json_file)
	print("user "+str(user_number)+" extracted")
	user_number+=1

#write the results to the log file
stream = LogsFileWriter.open("features_presence_rate")
LogsFileWriter.write(features_presence_rate, stream)
LogsFileWriter.close(stream)

예제 #4

0

파일 보기

파일: list_events_all_users.py 프로젝트: wsgan001/discovering_user_habbits_from_smartphone_logs

import collections

sys.path.insert(0, "/home/dehajjik/workspace/src/utils")
from data_utils import *
from list_events_one_user import list_events_one_user as leou
from logs_file_writer import LogsFileWriter

stream = LogsFileWriter.open("events_list")
LogsFileWriter.write(
    "different events types and their number of occurrences in the dataset \n\n",
    stream)

events_list = collections.Counter()

for user_id in DataExtractor.users_ids_list():
    current_list = collections.Counter(leou(user_id))
    events_list += current_list

    LogsFileWriter.write("\n\n user " + str(user_id) + " event list : \n",
                         stream)
    LogsFileWriter.write(str(json.dumps(current_list, indent=4)), stream)
    print("user " + str(user_id) + " extracted")

#sort the notifications by decreasing order
events_list = collections.OrderedDict(
    sorted(events_list.items(), key=lambda t: t[1], reverse=True))

LogsFileWriter.write("\n\n all users notification list : \n", stream)
LogsFileWriter.write(str(json.dumps(events_list, indent=4)), stream)
LogsFileWriter.close(stream)

예제 #5

0

파일 보기

파일: evaluation_on_feature_prediction_all_users.py 프로젝트: wsgan001/discovering_user_habbits_from_smartphone_logs

def evaluation_on_feature_prediction_all_users(feature_name):
    #contains the scores of the different predictors combined for all the users
    mixed_evaluations = {}

    #contains the scores details of the different predictors for each user
    detailed_evaluations = {}
    iter = 0
    for user_id in MDataExtractor.users_ids_list():
        print("user " + str(user_id) + " on working")
        if feature_name == "day":
            [evaluations, classes] = eodpou(user_id)
            file_name = "evaluation_day_prediction"
        elif feature_name == "location":
            [evaluations, classes] = eolpou(user_id)
            file_name = "evaluation_location_prediction"
        elif feature_name == "applaunch":
            [evaluations, classes] = eoapou(user_id)
            file_name = "evaluation_applaunch_prediction"
        else:
            raise Exception("NOT IMPLEMENTED FEATURE EXCEPTION: the feature " +
                            str(feature_name) +
                            " do not implemented or do not exist")

        if len(detailed_evaluations.keys()) == 0:
            #instantiate the evaluations objects add the classifier names for detailed_evaluations and detailed_evaluations
            for classifier_name, evaluation_results in evaluations.iteritems():
                mixed_evaluations[classifier_name] = {
                    "correct predictions": 0,
                    "total predictions": 0,
                    "accuracy": 0,
                    "average accuracy": 0,
                    "average macro_accuracy": 0
                }
                detailed_evaluations[classifier_name] = {}

        #integrate the evaluations of the current users
        for classifier_name, evaluation_results in evaluations.iteritems():
            #evaluation_results = [good_predictions, total_predictions, accuracy, macro_average_acc_by_class, accuracy_class1,...,accuracy_classn]
            #update the mixed evaluations
            mixed_evaluations[classifier_name][
                "correct predictions"] += evaluation_results[0]
            mixed_evaluations[classifier_name][
                "total predictions"] += evaluation_results[1]
            mixed_evaluations[classifier_name]["accuracy"] = (
                mixed_evaluations[classifier_name]["correct predictions"] *
                1.0) / mixed_evaluations[classifier_name]["total predictions"]
            mixed_evaluations[classifier_name]["average accuracy"] = (
                (mixed_evaluations[classifier_name]["average accuracy"] * iter)
                + evaluation_results[2]) / ((iter + 1) * 1.0)
            mixed_evaluations[classifier_name]["average macro_accuracy"] = (
                (mixed_evaluations[classifier_name]["average macro_accuracy"] *
                 iter) + evaluation_results[3]) / ((iter + 1) * 1.0)

            #update the detailed evaluations
            detailed_evaluations[classifier_name]["user " + str(user_id)] = {
                "correct predictions": evaluation_results[0],
                "total predictions": evaluation_results[1],
                "accuracy": evaluation_results[2],
                "macro_accuracy": evaluation_results[3],
                "accuracy by class": evaluation_results[4:]
            }

        iter += 1
        print("user " + str(user_id) + " extracted")

    #write the results in a log file

    #write the dictionaries into files
    out = LogsFileWriter.open(file_name)
    LogsFileWriter.write(
        "predictions on the classes " + JsonUtils.dict_as_json_str(classes) +
        "\n", out)
    LogsFileWriter.write("Total scores :\n", out)
    LogsFileWriter.write(JsonUtils.dict_as_json_str(mixed_evaluations), out)
    LogsFileWriter.write("detailed scores :\n", out)
    LogsFileWriter.write(JsonUtils.dict_as_json_str(detailed_evaluations), out)
    LogsFileWriter.close(out)

예제 #6

0

파일 보기

파일: location_distribution_per_hour_one_user.py 프로젝트: wsgan001/discovering_user_habbits_from_smartphone_logs

def location_distribution_per_hour_one_user(user_id, option):

    if option != None:
        current_option = option
    else:
        current_option = hardcoded_option

    complete_data = DataExtractor.load_json_data(user_id)
    location_data = collections.OrderedDict(
        sorted(
            DataExtractor.data(DataExtractor.location_name,
                               complete_data).items()))
    location_metadata = DataExtractor.metadata(DataExtractor.location_name,
                                               complete_data)

    #contains the counts per hour for each location
    location_count = {}

    #contains the total number of visits for each location
    location_freq = {}

    iter = 0

    #this is to guarantee that we do not mark the same date more than ones (because it may occur that different realizations be mapped to the same hour)
    most_recent_date = None
    most_recent_location = None
    for date, location_feature in location_data.iteritems():
        print "iteration " + str(iter) + " from " + str(len(location_data))
        iter += 1

        location = location_feature[DataExtractor.location_attribute]
        if location not in location_count:
            location_count[location] = np.zeros(24)
            location_freq[location] = 0.0

        start_date = DataExtractor.start_date_of_realization(date)
        end_date = DataExtractor.end_date_of_realization(date)
        #get a list of datetimes representing the each hour covered between start date and end date
        hours_covered = DateTimeUtils.hours_between(start_date, end_date)

        #increment by 1 the concerned hours for location
        for datetime in hours_covered:
            if datetime != most_recent_date or location != most_recent_location:
                #then we have either a new date to mark or a new location
                if ((current_option == options[0]
                     and DateTimeUtils.is_week_end_day(datetime))
                        or (current_option == options[1]
                            and not DateTimeUtils.is_week_end_day(datetime))
                        or (current_option == options[2])):
                    #fits the options
                    location_count[location][datetime.hour] += 1.0
                    location_freq[location] += 1.0
                most_recent_date = datetime
                most_recent_location = location
    #order the location_count by the most frequent locations
    location_freq = collections.OrderedDict(
        sorted(location_freq.items(), key=lambda tup: tup[1], reverse=True))
    ordered_locations = location_freq.keys()

    #transform the dictionary into a 2 dimentional numpy array and select only the k-most frequent locations
    np_matrix = np.zeros((len(ordered_locations), 24))
    idex = 0
    for location in ordered_locations:
        np_matrix[idex, :] = location_count[location]
        idex += 1

    #create the distribution of locations by hour (normalizing each column)
    column_sums = np_matrix.sum(axis=0)
    location_dist = np_matrix[0:k, :] / column_sums[np.newaxis, :]
    ld_title = "top-" + str(k) + " distribution of locations by hour " + str(
        current_option) + " for user " + str(user_id)
    ld_x = "hours"
    ld_y = "Pr[location|hour]"

    #create the legends
    legends = [
        str(location) + ", r=" +
        str(location_metadata[DataExtractor.location_attribute][str(location)][
            DataExtractor.location_metaradius]) + ", c=" +
        str(location_metadata[DataExtractor.location_attribute][str(location)][
            DataExtractor.location_metacenter])
        for location in ordered_locations[0:k]
    ]

    PlotlibDrawer.plot_np(None, location_dist, legends, ld_x, ld_y, ld_title,
                          0)

    #add an informative description in a log file containing the content of the clusters
    stream = LogsFileWriter.open("most_frequent_location_clusters_" +
                                 str(current_option) + "_user" + str(user_id))

    for location in ordered_locations[0:k]:
        LogsFileWriter.write(
            "location " + str(location) + "\n" +
            JsonUtils.dict_as_json_str(location_metadata[
                DataExtractor.location_attribute][str(location)]) + "\n\n\n",
            stream)

    LogsFileWriter.close(stream)

예제 #7

0

파일 보기

    evaluations = epou(user_id)

    if len(detailed_evaluations.keys()) == 0:
        #instantiate the evaluations objects add the classifier names for detailed_evaluations and detailed_evaluations
        for classifier_name, evaluation_results in evaluations.iteritems():
            mixed_evaluations[classifier_name] = {"average perplexity": 0}
            detailed_evaluations[classifier_name] = {}

    #integrate the evaluations of the current users
    for classifier_name, perplexity in evaluations.iteritems():
        mixed_evaluations[classifier_name]["average perplexity"] = (
            (mixed_evaluations[classifier_name]["average perplexity"] * iter) +
            perplexity) / ((iter + 1) * 1.0)

        #update the detailed evaluations
        detailed_evaluations[classifier_name]["user " + str(user_id)] = {
            "perplexity": perplexity
        }

    iter += 1
    print("user " + str(user_id) + " extracted")

#write the dictionaries into files
out = LogsFileWriter.open(file_name)
LogsFileWriter.write("perplexity \n", out)
LogsFileWriter.write("Total scores :\n", out)
LogsFileWriter.write(JsonUtils.dict_as_json_str(mixed_evaluations), out)
LogsFileWriter.write("detailed scores :\n", out)
LogsFileWriter.write(JsonUtils.dict_as_json_str(detailed_evaluations), out)
LogsFileWriter.close(out)

예제 #8

0

파일 보기

파일: location_distribution_per_hour_one_user.py 프로젝트: khajji/master_thesis_report

def location_distribution_per_hour_one_user(user_id, option):
	
	if option!=None:
		current_option = option
	else:
		current_option = hardcoded_option
	
	complete_data = DataExtractor.load_json_data(user_id)
	location_data = collections.OrderedDict(sorted(DataExtractor.data(DataExtractor.location_name, complete_data).items()))
	location_metadata = DataExtractor.metadata(DataExtractor.location_name, complete_data)
	
	#contains the counts per hour for each location
	location_count = {}
	
	#contains the total number of visits for each location
	location_freq = {}
	
	iter = 0
	
	#this is to guarantee that we do not mark the same date more than ones (because it may occur that different realizations be mapped to the same hour)
	most_recent_date = None
	most_recent_location = None
	for date, location_feature in location_data.iteritems():
		print "iteration "+str(iter)+" from "+str(len(location_data))
		iter+=1
		
		location = location_feature[DataExtractor.location_attribute]
		if location not in location_count:
			location_count[location] = np.zeros(24)
			location_freq[location] = 0.0
		
		start_date = DataExtractor.start_date_of_realization(date)
		end_date = DataExtractor.end_date_of_realization(date)
		#get a list of datetimes representing the each hour covered between start date and end date
		hours_covered = DateTimeUtils.hours_between(start_date, end_date)
		
		
		#increment by 1 the concerned hours for location
		for datetime in hours_covered:
			if datetime != most_recent_date or location != most_recent_location:
				#then we have either a new date to mark or a new location
				if ((current_option == options[0] and DateTimeUtils.is_week_end_day(datetime))
					or (current_option == options[1] and not DateTimeUtils.is_week_end_day(datetime))
					or (current_option == options [2])):
					#fits the options
					location_count[location][datetime.hour]+=1.0
					location_freq[location]+=1.0
				most_recent_date = datetime
				most_recent_location = location
	#order the location_count by the most frequent locations
	location_freq = collections.OrderedDict(sorted(location_freq.items(), key=lambda tup: tup[1], reverse=True))
	ordered_locations = location_freq.keys()
	
	
	#transform the dictionary into a 2 dimentional numpy array and select only the k-most frequent locations
	np_matrix = np.zeros((len(ordered_locations), 24))
	idex = 0
	for location in ordered_locations:
		np_matrix[idex,:]=location_count[location]
		idex+=1
		
			
	#create the distribution of locations by hour (normalizing each column)
	column_sums = np_matrix.sum(axis=0)
	location_dist = np_matrix[0:k,:] / column_sums[np.newaxis , :]
	ld_title = "top-"+str(k)+" distribution of locations by hour "+str(current_option)+" for user "+str(user_id)
	ld_x = "hours"
	ld_y = "Pr[location|hour]"
	
	
		
	#create the legends
	legends = [str(location)+", r="+str(location_metadata[DataExtractor.location_attribute][str(location)][DataExtractor.location_metaradius])+", c="+str(location_metadata[DataExtractor.location_attribute][str(location)][DataExtractor.location_metacenter]) for location in ordered_locations[0:k]]
	
	
	PlotlibDrawer.plot_np(None, location_dist, legends, ld_x, ld_y, ld_title, 0)
	
	#add an informative description in a log file containing the content of the clusters
	stream = LogsFileWriter.open("most_frequent_location_clusters_"+str(current_option)+"_user"+str(user_id))
	
	for location in ordered_locations[0:k]:
		LogsFileWriter.write("location "+str(location)+"\n"+JsonUtils.dict_as_json_str(location_metadata[DataExtractor.location_attribute][str(location)])+"\n\n\n", stream)
	
	LogsFileWriter.close(stream)