def update_cluster_user_rec_item(self, cluster_user_id_arr): data_smat = get_data_smat() all_user_rec_item_dict = self.all_user_rec_item_dict cluster_user_data_smat = data_smat[cluster_user_id_arr] user_id_arr, item_id_arr, _ = find_nonzero_indices(cluster_user_data_smat) cluster_user_dict = {} for user_id in cluster_user_id_arr: cluster_user_dict[user_id] = set() item_set = set(item_id_arr) item_count_dict = {} for i,item_id in enumerate(item_id_arr): try: item_count_dict[item_id] += 1 except KeyError: item_count_dict[item_id] = 1 user_item_set = cluster_user_dict[user_id_arr[i]] user_item_set.add(item_id) for user_id in cluster_user_id_arr: user_item_set = cluster_user_dict[user_id] user_new_item_set = item_set - user_item_set user_rec_item_dict = all_user_rec_item_dict[user_id] for item_id in user_new_item_set: weight = item_count_dict[item_id] try: user_rec_item_dict[item_id] += weight except KeyError: user_rec_item_dict[item_id] = weight
def gen_toy_dataset(n_user, n_item): mat = random.choice(2, n_user*n_item, p=[0.7,0.3]).reshape((n_user,n_item)) # print mat smat = csr_matrix(mat) # print smat row_index_arr,column_index_arr,_ = find_nonzero_indices(smat) relationships = zip(row_index_arr, column_index_arr) return relationships
def gen_debug_dataset(): data = [ [1,1,1,1,0,0,0,0,0,0], [1,1,1,0,0,1,0,0,0,0], [1,0,0,1,0,1,1,0,0,0], [1,1,0,1,1,0,0,1,0,0], [0,0,1,1,1,1,0,0,0,0], [0,0,0,1,1,0,0,1,1,0], [0,0,0,1,0,1,1,1,0,1], [0,0,0,0,1,0,1,1,1,1], [0,0,1,0,0,0,1,0,1,1], [0,0,0,1,0,1,1,0,1,1], ] n_user = len(data) n_item = len(data[0]) print n_user, n_item smat = csr_matrix(data, shape=(n_user, n_item), dtype=UINT) row_index_arr,column_index_arr,_ = find_nonzero_indices(smat) relationships = zip(row_index_arr, column_index_arr) return n_user, n_item, relationships
def op_sparse_matrix(): mat = random.randint(0,2,size=(10,10)) print mat smat = csr_matrix(mat) indices = find_nonzero_indices(smat)[1]