def _apply_pretransformation(self, matrix, pretransformation_name): transformed_trainset = np.copy(matrix) if pretransformation_name == "none" or pretransformation_name == None: #initialize the transformation scores to an array of the size of the number of features and containing all ones. This is equivalent to not having made any transformation transformation_scores_by_feature = np.ones(np.shape(matrix)[LCBMFComputer.feature_dimention]) elif pretransformation_name == "idf": do_laplace_smoothing = True [transformed_trainset, transformation_scores_by_feature] = Numpy.idf_matrix_transformation(matrix, LCBMFComputer.time_dimention, do_laplace_smoothing) elif pretransformation_name == "ldc": [transformed_trainset, transformation_scores_by_feature] = Numpy.ldc_matrix_transformation(matrix, LCBMFComputer.time_dimention) elif pretransformation_name == "idc": do_laplace_smoothing = True [transformed_trainset, transformation_scores_by_feature] = Numpy.idc_matrix_transformation(matrix, LCBMFComputer.time_dimention, do_laplace_smoothing) elif pretransformation_name == "idf3": do_laplace_smoothing = True [transformed_trainset, transformation_scores_by_feature] = Numpy.idf3_matrix_transformation(matrix, LCBMFComputer.time_dimention, do_laplace_smoothing) else: raise Exception("WRONG TRANSFORMATION EXCEPTION : the transformation "+pretransformation_name+" do not exist") return [transformed_trainset, transformation_scores_by_feature]
def transformations_comparaison_one_user(user_id): global file_name global rows_labels global labels_importance global labels_importance_derivative global labels_importance_rank global transformation_vectors labels_importance = {} labels_importance_derivative = {} labels_importance_rank = {} transformation_vectors = {} rows_labels = None file_name = None file_name = "transformations_comparaison_"+str(user_id) print "loading matrix user "+str(user_id)+"..." data_matrix = MDataExtractor.load_matrix(user_id) rows_labels = MDataExtractor.load_labels_vector(user_id) columns_labels = MDataExtractor.load_time_vector(user_id) importance_scores = MDataExtractor.load_importance_scores(user_id) add_transformation(data_matrix, "presence_count") add_transformation((data_matrix*100/(np.size(data_matrix,time_dimention)*1.0)), "presence_percentage") idf_matrix = Numpy.idf_matrix_transformation(data_matrix, time_dimention, do_laplace_smoothing) add_transformation(idf_matrix, "idf_score") '''idf2_matrix = Numpy.idf2_matrix_transformation(data_matrix, time_dimention, do_laplace_smoothing) add_transformation(idf2_matrix, "idf2_score")''' '''idf10_matrix = Numpy.idflog10_matrix_transformation(data_matrix, time_dimention, do_laplace_smoothing) add_transformation(idf10_matrix, "idflog10_score")''' idf3_matrix = Numpy.idf3_matrix_transformation(data_matrix, time_dimention, do_laplace_smoothing) add_transformation(idf3_matrix, "idf3_score") ldc_matrix = Numpy.ldc_matrix_transformation(data_matrix, time_dimention) add_transformation(ldc_matrix, "ldc_score") idc_matrix = Numpy.idc_matrix_transformation(data_matrix, time_dimention, do_laplace_smoothing) add_transformation(idc_matrix, "idc_score") compare("presence_count", user_id)