def get_recommendations(keywords): df = pd.read_csv('richCityData.csv') score_dict = {} for index, row in df.iterrows(): score_dict[index] = CosineSimilarity.cosine_similarity_of(row['description'], keywords) #sort cities by score and index. sorted_scores = sorted(score_dict.items(), key=operator.itemgetter(1), reverse=True) counter = 0 #create an empty results data frame. resultDF = pd.DataFrame(columns=('city', 'popularity', 'description', 'image')) #get highest scored 5 cities. for i in sorted_scores: print(i[0], i[1]) resultDF = resultDF.append({'city': df.iloc[i[0]]['city'], 'popularity': df.iloc[i[0]]['popularity'], 'description': df.iloc[i[0]]['description'], 'image': df.iloc[i[0]]['image']}, ignore_index=True) counter += 1 if counter>4: break #convert DF to json. json_result = json.dumps(resultDF.to_dict('records')) return json_result
def test_cosine_similarity_same(self): text1 = "happy birthday" text2 = "happy birthday" cs = CosineSimilarity.cosine_similarity_of(text1, text2) #strings used due to floating number problem. self.assertEqual("%.2f" % cs, "1.00")
def test_cosine_similarity_different(self): text1 = "hello sir" text2 = "good afternoon" cs = CosineSimilarity.cosine_similarity_of(text1, text2) #strings used due to floating number problem. self.assertEqual("%.2f" % cs, "0.00")
def get_recommendations_include_rating_count_threshold_positive_negative_reviews( keywords): df = pd.read_csv('city_data_cleared.csv') score_dict = {} for index, row in df.iterrows(): cs_score = CosineSimilarity.cosine_similarity_of( row['description'], keywords) rating = row['rating'] rating_count = row['rating_count'] positive_review_count = row['positive_review'] negative_review_count = row['negative_review'] rating_contribution = RatingExtractor.get_rating_weight_with_count_and_reviews( rating, rating_count, positive_review_count, negative_review_count) final_score = RecommenderEngine.calculate_final_score( cs_score, rating_contribution) score_dict[index] = final_score #sort cities by score and index. sorted_scores = sorted(score_dict.items(), key=operator.itemgetter(1), reverse=True) counter = 0 #create an empty results data frame. resultDF = pd.DataFrame(columns=('city', 'popularity', 'description', 'score')) #get highest scored 5 cities. for i in sorted_scores: #print index and score of the city. #print(i[0], i[1]) resultDF = resultDF.append( { 'city': df.iloc[i[0]]['city'], 'popularity': df.iloc[i[0]]['popularity'], 'description': df.iloc[i[0]]['description'], 'score': i[1] }, ignore_index=True) counter += 1 if counter > 4: break #convert DF to json. json_result = json.dumps(resultDF.to_dict('records')) return json_result
def get_recommendations(resume, jobs_df): score_dict = {} for index, row in jobs_df.iterrows(): score_dict[index] = CosineSimilarity.cosine_similarity_of( row['description_cleaned'], resume) # Sort descriptions by score and index sorted_scores = sorted(score_dict.items(), key=operator.itemgetter(1), reverse=True) counter = 0 # Create results data frame resultDF = pd.DataFrame(columns=[ 'Job Index', 'Company', 'Title', 'Location', 'Description', 'Job Description' ]) # , 'score']) # Get the 10 jobs with the highest similarity scores for i in sorted_scores: # print index & score of the job description resultDF = resultDF.append( { 'Description': jobs_df.iloc[i[0]]['job_description'], 'Title': jobs_df.iloc[i[0]]['title'], 'Company': jobs_df.iloc[i[0]]['company_name'], 'Location': jobs_df.iloc[i[0]]['location'], 'Job Index': jobs_df.iloc[i[0]]['Unnamed: 0'] }, ignore_index=True) # 'score': i[1]}, ignore_index=True) counter += 1 if counter > 10: break json_result = json.dumps(resultDF.to_dict('records')) resultDF.fillna('', inplace=True) return resultDF
def get_rating_recommendations(keywords): df = pd.read_csv('ratingRichCityData.csv') score_dict = {} for index, row in df.iterrows(): cs = CosineSimilarity.cosine_similarity_of(row['description'], keywords) rating = row['rating'] rating_count = row['rating_count'] positive_review_count = row['positive_review'] negative_review_count = row['negative_review'] rat_value = RatingExtractor.get_rating_with_count_and_reviews(rating,rating_count,positive_review_count,negative_review_count) score = RecommenderEngine.calculate_score_from(cs,rat_value) score_dict[index] = score sorted_scores = sorted(score_dict.items(), key=operator.itemgetter(1), reverse=True) counter = 0 resultDF = pd.DataFrame(columns=('city', 'popularity', 'description', 'image')) #get highest scored 5 cities. for i in sorted_scores: print(i[0], i[1]) resultDF = resultDF.append({'city': df.iloc[i[0]]['city'], 'popularity': df.iloc[i[0]]['popularity'], 'description': df.iloc[i[0]]['description'], 'image': df.iloc[i[0]]['image']}, ignore_index=True) counter += 1 if counter>4: break #convert DF to json. json_result = json.dumps(resultDF.to_dict('records')) return json_result
def test_cosine_similarity_some(self): text1 = "apple banana orange" text2 = "orange berry ananas" cs = CosineSimilarity.cosine_similarity_of(text1, text2) self.assertEqual("%.2f" % cs, "0.33")