def _apply_pretransformation(self, matrix, pretransformation_name): transformed_trainset = np.copy(matrix) if pretransformation_name == "none" or pretransformation_name == None: #initialize the transformation scores to an array of the size of the number of features and containing all ones. This is equivalent to not having made any transformation transformation_scores_by_feature = np.ones(np.shape(matrix)[LCBMFComputer.feature_dimention]) elif pretransformation_name == "idf": do_laplace_smoothing = True [transformed_trainset, transformation_scores_by_feature] = Numpy.idf_matrix_transformation(matrix, LCBMFComputer.time_dimention, do_laplace_smoothing) elif pretransformation_name == "ldc": [transformed_trainset, transformation_scores_by_feature] = Numpy.ldc_matrix_transformation(matrix, LCBMFComputer.time_dimention) elif pretransformation_name == "idc": do_laplace_smoothing = True [transformed_trainset, transformation_scores_by_feature] = Numpy.idc_matrix_transformation(matrix, LCBMFComputer.time_dimention, do_laplace_smoothing) elif pretransformation_name == "idf3": do_laplace_smoothing = True [transformed_trainset, transformation_scores_by_feature] = Numpy.idf3_matrix_transformation(matrix, LCBMFComputer.time_dimention, do_laplace_smoothing) else: raise Exception("WRONG TRANSFORMATION EXCEPTION : the transformation "+pretransformation_name+" do not exist") return [transformed_trainset, transformation_scores_by_feature]
def transformations_comparaison_one_user(user_id): global file_name global rows_labels global labels_importance global labels_importance_derivative global labels_importance_rank global transformation_vectors labels_importance = {} labels_importance_derivative = {} labels_importance_rank = {} transformation_vectors = {} rows_labels = None file_name = None file_name = "transformations_comparaison_"+str(user_id) print "loading matrix user "+str(user_id)+"..." data_matrix = MDataExtractor.load_matrix(user_id) rows_labels = MDataExtractor.load_labels_vector(user_id) columns_labels = MDataExtractor.load_time_vector(user_id) importance_scores = MDataExtractor.load_importance_scores(user_id) add_transformation(data_matrix, "presence_count") add_transformation((data_matrix*100/(np.size(data_matrix,time_dimention)*1.0)), "presence_percentage") idf_matrix = Numpy.idf_matrix_transformation(data_matrix, time_dimention, do_laplace_smoothing) add_transformation(idf_matrix, "idf_score") '''idf2_matrix = Numpy.idf2_matrix_transformation(data_matrix, time_dimention, do_laplace_smoothing) add_transformation(idf2_matrix, "idf2_score")''' '''idf10_matrix = Numpy.idflog10_matrix_transformation(data_matrix, time_dimention, do_laplace_smoothing) add_transformation(idf10_matrix, "idflog10_score")''' idf3_matrix = Numpy.idf3_matrix_transformation(data_matrix, time_dimention, do_laplace_smoothing) add_transformation(idf3_matrix, "idf3_score") ldc_matrix = Numpy.ldc_matrix_transformation(data_matrix, time_dimention) add_transformation(ldc_matrix, "ldc_score") idc_matrix = Numpy.idc_matrix_transformation(data_matrix, time_dimention, do_laplace_smoothing) add_transformation(idc_matrix, "idc_score") compare("presence_count", user_id)
def compute_svd_one_user(user_id): file_name = "svd_user_"+str(user_id) print "loading matrix user "+str(user_id)+"..." data_matrix = MDataExtractor.load_matrix(user_id) rows_labels = MDataExtractor.load_labels_vector(user_id) columns_labels = MDataExtractor.load_time_vector(user_id) importance_scores = MDataExtractor.load_importance_scores(user_id) print "user "+str(user_id)+" has "+str(len(rows_labels))+" features (rows) and "+str(len(columns_labels))+" realization (columns)" #do the idf / or idc transformation before computing the SVD print "doing idf transformation for user "+str(user_id)+"..." document_transformed_matrix = np.copy(data_matrix) [document_transformed_matrix, scores] = Numpy.idf_matrix_transformation(data_matrix, time_dimention, do_laplace_smoothing) #[document_transformed_matrix, scores] = Numpy.idc_matrix_transformation(data_matrix, time_dimention, do_laplace_smoothing) #[document_transformed_matrix, scores] = Numpy.idf3_matrix_transformation(data_matrix, time_dimention, do_laplace_smoothing) #[document_transformed_matrix, scores] = Numpy.ldc_matrix_transformation(data_matrix, time_dimention) term_transformed_matrix = np.ones(np.shape(data_matrix)) #term_transformed_matrix = Numpy.ti_matrix_transformation(data_matrix, importance_scores) #term_transformed_matrix = Numpy.nti_matrix_transformation(data_matrix, importance_scores) data_matrix = document_transformed_matrix * term_transformed_matrix #compute the SVD svd_comp = SVDComputer(data_matrix, rows_labels, columns_labels) print "computing SVD for user "+str(user_id)+"..." svd_comp.compute_svd() print "constructing interpretable output for user "+str(user_id)+"..." energy_captured = svd_comp.construct_rows_interpretable_output(disp_k, disp_m) r_output = svd_comp.rows_interpretable_output print "the energy captured with "+str(disp_k)+" concepts is "+str(energy_captured)+" %" #write the result print "writing SVD result for user "+str(user_id)+"..." JsonLogsFileWriter.write(r_output, file_name)
def compute_svd_one_user(user_id): file_name = "svd_user_" + str(user_id) print "loading matrix user " + str(user_id) + "..." data_matrix = MDataExtractor.load_matrix(user_id) rows_labels = MDataExtractor.load_labels_vector(user_id) columns_labels = MDataExtractor.load_time_vector(user_id) importance_scores = MDataExtractor.load_importance_scores(user_id) print "user " + str(user_id) + " has " + str( len(rows_labels)) + " features (rows) and " + str( len(columns_labels)) + " realization (columns)" #do the idf / or idc transformation before computing the SVD print "doing idf transformation for user " + str(user_id) + "..." document_transformed_matrix = np.copy(data_matrix) [document_transformed_matrix, scores] = Numpy.idf_matrix_transformation(data_matrix, time_dimention, do_laplace_smoothing) #[document_transformed_matrix, scores] = Numpy.idc_matrix_transformation(data_matrix, time_dimention, do_laplace_smoothing) #[document_transformed_matrix, scores] = Numpy.idf3_matrix_transformation(data_matrix, time_dimention, do_laplace_smoothing) #[document_transformed_matrix, scores] = Numpy.ldc_matrix_transformation(data_matrix, time_dimention) term_transformed_matrix = np.ones(np.shape(data_matrix)) #term_transformed_matrix = Numpy.ti_matrix_transformation(data_matrix, importance_scores) #term_transformed_matrix = Numpy.nti_matrix_transformation(data_matrix, importance_scores) data_matrix = document_transformed_matrix * term_transformed_matrix #compute the SVD svd_comp = SVDComputer(data_matrix, rows_labels, columns_labels) print "computing SVD for user " + str(user_id) + "..." svd_comp.compute_svd() print "constructing interpretable output for user " + str(user_id) + "..." energy_captured = svd_comp.construct_rows_interpretable_output( disp_k, disp_m) r_output = svd_comp.rows_interpretable_output print "the energy captured with " + str(disp_k) + " concepts is " + str( energy_captured) + " %" #write the result print "writing SVD result for user " + str(user_id) + "..." JsonLogsFileWriter.write(r_output, file_name)