def user_rating_clustering():
    data = data_helpers.read_feature_data(
        file_path='../data/user_movie_rating')
    ms_model = MeanShift()
    predict_labels = ms_model.fit_predict(data)
    cluster_centers_indices = ms_model.cluster_centers_
    print(predict_labels)
Example #2
0
def customer_clustering():
    data = data_helpers.read_feature_data(file_path='../data/customer_data')
    kmean_model = KMeans(n_clusters=4)
    predict_labels = kmean_model.fit_predict(data)
    cluster_centers_indices = kmean_model.cluster_centers_
    print(predict_labels)
    total_guess_num = 800
    correct_guess_num = 0
    predict_label_count_matrix = [[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0],
                                  [0, 0, 0, 0]]
    for id_, label_ in enumerate(predict_labels):
        if id_ < 200:
            predict_label_count_matrix[0][label_] += 1
        elif id_ < 400:
            predict_label_count_matrix[1][label_] += 1
        elif id_ < 600:
            predict_label_count_matrix[2][label_] += 1
        else:
            predict_label_count_matrix[3][label_] += 1
    for i in range(4):
        correct_guess_num += max(predict_label_count_matrix[i])

    accuracy = float(correct_guess_num) / float(total_guess_num)
    print('accuracy:' + str(accuracy))
    return data, predict_labels
def customer_clustering():
    data = data_helpers.read_feature_data(
        file_path='../data/customer_data_min')
    aff_prop_model = AffinityPropagation(convergence_iter=150, max_iter=1000)
    aff_prop_model.fit(data)
    cluster_centers_indices = aff_prop_model.cluster_centers_indices_
    labels = aff_prop_model.labels_
    print(labels)
    return data, labels
def load_train_data(file_path='../data/customer_salary_satisfaction'):
    train_data = data_helpers.read_feature_data(file_path)
    feature_dim = len(train_data[0]) - 1
    feature_data = []
    target_data = []
    for data_sample in train_data:
        feature_vec = []
        for id_ in range(feature_dim):
            feature_vec.append(data_sample[id_])
        feature_data.append(feature_vec)
        target_data.append(data_sample[feature_dim])
    return feature_data, target_data
def load_train_data(file_path='../data/customer_off_time_salary_satisfaction'):
    train_data = data_helpers.read_feature_data(file_path)
    feature_dim = len(train_data[0]) - 1
    feature_data = []
    target_data = []
    for data_sample in train_data:
        feature_vec = []
        for id_ in range(feature_dim):
            feature_vec.append(data_sample[id_])
        feature_data.append(feature_vec)
        target_data.append(data_sample[feature_dim])
    return feature_data, target_data
def customer_clustering():
    data = data_helpers.read_feature_data(file_path='./data/customer_data_min')
    # 2-dimensions array
    aff_prop_model = AffinityPropagation(convergence_iter=150, max_iter=1000)
    # number iteration:
    # 	- convergence: 150
    # 	- max: 1000

    aff_prop_model.fit(data)  # train model
    cluster_centers_indices = aff_prop_model.cluster_centers_indices_
    labels = aff_prop_model.labels_
    print(labels)
    return data, labels
def load_train_data(file_path, label_pos=-1):
    train_data = data_helpers.read_feature_data(file_path)
    feature_dim = len(train_data[0]) - 1
    feature_data = []
    target_data = []
    if label_pos == 0:
        feature_ids = range(1, feature_dim + 1)
    else:
        feature_ids = range(feature_dim)
    for data_sample in train_data:
        feature_vec = []

        for id_ in feature_ids:
            feature_vec.append(data_sample[id_])
        feature_data.append(feature_vec)
        target_data.append(data_sample[label_pos])
    return feature_data, target_data