def cluster_users(clustering_tool, reader: DataReader, clustered_users: List[User], clusters_number: int, lists_count: int) -> Clusterings: """ Cluster users by lists with numbers from 0 to given lists_count. Users are stiling from the reader object. In result clusters appears all user from the reader except users in clustered_users. :param clustering_tool: tool from sci-kit for clustering :param reader: DataReader object :param clustered_users: list of users that won't appear in clusterings :param clusters_number: expected number of clusters :param lists_count: cluster by lists from 1 to lists_count :return: Clusterings object, representing clusterings by different lists """ clustering = Clusterings(clusters_number) all_users = reader.get_all_users() for list_number in range(0, lists_count): features_matrix = get_matrix_by_list_for_not_clustered_users( all_users, list_number, clustered_users) clusters_list = clustering_tool.fit_predict(features_matrix) clustering.add_clustering_for_list( convert_clusters_list_to_users_sets(reader, clusters_list, clustered_users), list_number) return clustering
from experiments.experiment4.preferences_clustering import PreferencesClustering from utils.data_reader import DataReader from utils.json_serializer import Serializer from utils.metrics import TeamDesiresMetric, ClusteringMetric __author__ = 'Xomak' reader = DataReader("../data/ms-sne.json") pc = PreferencesClustering(reader.get_all_users(), 6) result = pc.clusterize() for current_set in result: output = [] for user in current_set: output.append(str(user)) print(','.join(output)) print(TeamDesiresMetric(current_set)) print(ClusteringMetric(result).get_average_desires_metric()) Serializer.serialize_to_file(result, "../web-visualiser/data.json")
desires_weight = 1 need_balance = True metric_type = MetricTypes.DESIRES data_files_names = [ "../data/ms-sne_names.json", "../data/eltech-vector.json", "../data/users.json" ] results = ResultHolder(data_files_names) for data_file_name in data_files_names: reader = DataReader(data_file_name) max_teams = int(len(reader.get_all_users()) / 2) for teams in range(2, max_teams + 1): print("\n\nTEAMS: %d\n" % teams) # Spectral clustering_alg = UsersSpectralClustering(reader, teams, desires_weight=desires_weight, need_balance=need_balance) sets = clustering_alg.cluster() results.add_metric_for( data_file_name, teams, ClusteringMetric(sets, metric_type).get_final_metric()) # Agglomerative