inner_ht_list = list( NL_processor(x[j].Description).noun_count_result.keys()) dictionary_set = copy.deepcopy(ht_list) dictionary_set.extend(inner_ht_list) dictionary_set = list(set(dictionary_set)) # print("dict_set : ", dictionary_set) IR_Vector_inner = make_user_interest_vector( inner_ht_list, dictionary_set) IR_Vector_outer = make_user_interest_vector( ht_list, dictionary_set) # print(IR_Vector_inner) # print(IR_Vector_outer) sim = cosine_similarity(IR_Vector_inner, IR_Vector_outer) # print(sim) if len(inner_sim_best3) == recommend_size: # print('if문', file=sys.stderr) for k in range(recommend_size): if inner_sim_best3[k][0] < sim: inner_sim_best3[k] = [sim, x[j]] inner_sim_best3.sort(key=lambda element: element[0]) break # print(inner_sim_best3) else: # print('else 문', file=sys.stderr) inner_sim_best3.append([sim, x[j]]) inner_sim_best3.sort(key=lambda element: element[0]) # print(inner_sim_best3)
######################################################################################################################## # more researching............................................................................. # map : 지정된 리스트나 튜플을 지정된 함수로 처리하는 함수. # 사용 : map([함수명], [데이터]) ######################################################################################################################## # user_interest_matrix = map(make_user_interest_vector, users_interests) # # for i in user_interest_matrix: # print(i, type(i)) user_interest_matrix = [] for i in range(len(users_interests)): user_interest_matrix.append(make_user_interest_vector(users_interests[i])) user_similarities = [[cosine_similarity(interest_vector_i, interest_vector_j) for interest_vector_j in user_interest_matrix] for interest_vector_i in user_interest_matrix] # for i in user_similarities: # print(i) # print(user_similarities[13][0]) # sorted 함수 찾아볼 것!!!!! # 사용자의 관심사를 모든 사용자와의 유사도를 구하여, 튜플리스트로 리턴 def most_similar_users_to(user_id): pairs = [(other_user_id, similarity) for other_user_id, similarity in enumerate(user_similarities[user_id])
print(sample_dict, "\n", list1, "\n", list2, "\n") def make_user_interest_vector(user_interests, sample_dict): # unique_interests[i] 가 관심사 리스트에 존재한다면 i 번째 요소가 1이고, 존재하지 않으면 0인 벡터를 생성 return [1 if interest in user_interests else 0 for interest in sample_dict] interestV_1 = make_user_interest_vector(list1, sample_dict) print(interestV_1, type(interestV_1)) interestV_2 = make_user_interest_vector(list2, sample_dict) print(interestV_2) print(interestV_1, type(interestV_1)) print(interestV_2) sim = cosine_similarity(interestV_1, interestV_2) print(sim) for i in range(100): interestV_1.append(1) interestV_2.append(1) print(interestV_1, type(interestV_1)) print(interestV_2) sim = cosine_similarity(interestV_1, interestV_2) print(sim) for i in range(100): interestV_1.append(0)
def make_recommend_list(added_Album): batchsize = 200 offset = 0 # added_Album = Album_VO() # 추가된 앨범의 변수 및 추천리스트 초기화 added_Album_rec = Album_recommend_VO() added_Album_rec.Album_ID = added_Album.Album_ID added_Album_dict = NL_processor(added_Album.Description).noun_count_result added_Album_dict_list = list(added_Album_dict.keys()) existing_Album_list = Album_VO.query.offset(offset).limit(batchsize).all() # 쿼리셋이 있다면 루프수행 while (len(existing_Album_list) != 0): # batch 사이즈 만큼 돌면서 루프 수행 : 개별요소 비교 # 리스트자나....포문 바꾸자. # for i in range(len(existing_Album_list)): for existing_Album in existing_Album_list: # sim 초기화 sim = 0 # if existing_Album_list[i].Singer_ID == added_Album.Singer_ID: if existing_Album.Singer_ID == added_Album.Singer_ID: sim += 0.3 existing_Album_rec = Album_recommend_VO.query.filter( Album_recommend_VO.Album_ID == existing_Album.Album_ID).first() if existing_Album_rec is None: existing_Album_rec = Album_recommend_VO() existing_Album_rec.Album_ID = existing_Album.Album_ID # 분기점 # desc 가 있을때에만 비교하여 sim에 추가하는 과정이 필요 # 유사도 함수 자체에 null 일 경우 처리를 할 수 있지만 # 불필요한 루프는 여기서 제거 if existing_Album.Description is not None and added_Album.Description is not None: existing_Album_dict = NL_processor( existing_Album.Description).noun_count_result exsisting_Album_dict_list = list(existing_Album_dict.keys()) # 딥카피 해서 dict set을 별도로 만들기 위한 ref # dictionary_set : 유사도 비교할 두 객체의 dict sum dictionary_set = copy.deepcopy(added_Album_dict_list) dictionary_set.extend(exsisting_Album_dict_list) print(added_Album_dict_list) print(exsisting_Album_dict_list) dictionary_set = list(set(dictionary_set)) print("dict_set : ", dictionary_set) added_IR_Vector = make_user_interest_vector( added_Album_dict_list, dictionary_set) existing_IR_Vector = make_user_interest_vector( exsisting_Album_dict_list, dictionary_set) print(added_IR_Vector, type(added_IR_Vector)) print(existing_IR_Vector, type(existing_IR_Vector)) sim += cosine_similarity(added_IR_Vector, existing_IR_Vector) # print(sim, type(sim)) # print(existing_Album_rec.as_values()) existing_Album_rec_dict = existing_Album_rec.as_dict() if existing_Album_rec_dict is None: existing_Album_rec.set_dict({added_Album.Album_ID: sim}) else: existing_Album_rec_dict.update() print(existing_Album_rec.as_dict()) ######################################################################################################################## # 삽입 / 정렬 방법 생각해볼 것!!! # 그냥 기존 set 추가, key로 정렬할 것인지... # list 비교 인덱스로 접근해서 키밸류 변경 할지... # print(existing_Album_rec.as_keys()) # print(existing_Album_rec.as_values()) # dict1 = existing_Album_rec.as_dict(). # dict1.update({added_Album.Album_ID: sim}) # print(dict1) # # print(dict1, dict2) # sorted_by_value1 = sorted(dict1.items(), key=lambda kv: kv[1])[0:2] # sorted_by_value2 = sorted(dict2.items(), key=lambda kv: kv[1])[0:2] # print(sorted_by_value1) # print(sorted_by_value2) # last condition offset += batchsize existing_Album_list = Album_VO.query.offset(offset).limit( batchsize).all()