def main(likesJson, moviesJson): global moviesParser global likesMap channelIndex = ChannelIndex(Channels.CHANNELS) likesParser = LikesParser(channelIndex, likeThreshold=3) #out of 5 likesMap = likesParser.getUserDict(likesJson, build = True, count = None) creatingTrainingData(likesMap) moviesParser = MoviesParser(channelIndex) moviesParser.parseMoviesObjects(moviesJson, parse = True) userCount = len(likesMap) actorsCount = len(moviesParser.dictActor) directorsCount = len(moviesParser.dictDirector) genreCount = len(moviesParser.dictGenre) print "userCount-", userCount, ";actorsCount-", actorsCount, ";directorsCount-", directorsCount, ";genreCount-", genreCount # print("%d,%d,%d,%d" % (userCount, actorsCount, directorsCount, genreCount)) graph = Graph(userCount, actorsCount, directorsCount, genreCount) print "likesMap", likesMap[3] graph.calculateUserAffinity(moviesParser.dictDirector, moviesParser.dictActor, moviesParser.dictGenre, likesMap) graph.calculateAffinityBetweenEntities() graph.calculateSelfAffinity() reco = Recommender(graph) movies = reco.recommend(likesParser.model, moviesParser, 3) print("Recommendations: ") for m in movies: movieObj = moviesParser.movies[m] movieName = movieObj['name'] print "ID:", m, "-", movieName
def main(): ''' INPUT: None OUTPUT: Recommendations sorted in order of relevance Uses the TextParser and Recommender classes to generate resource recommendations given a user's Quora data ''' read = TextParser() read.assemble_df() pickle.dump(read.df, open("data/master_df.pkl", "wb")) quora_user = open('data/quora_data.pkl') quora = pickle.load(quora_user) filtered = read.preprocess_quora(quora) clean_quora = read.clean_up(filtered) pickle.dump(clean_quora, open("data/clean_quora.pkl", "wb")) # Make recommendations rec = Recommender() test = rec.vectorize() top_ten_ind = rec.recommend() recs = read.df.ix[top_ten_ind] recs = recs.reset_index() recs['img_link'] = map(get_image, recs['title']) recs.loc[recs['type']=='course', 'img_link'] = 'http://www.michaellenox.com/wp-content/uploads/2014/07/coursera_square_logo.jpg' pickle.dump(recs[0:5], open("data/recs.pkl", "wb")) print "These are your recommendations: \n" print recs[['title', 'type', 'img_link']] return recs[['title', 'type', 'img_link']]
def recommend(): resume = request.form.get('resume', None) requirements = request.form.get('requirements', None) # Initialize recommender recommender = Recommender(ngram_range=(1, 1), use_tagger=True, use_stem=False) recommender.fit(resume, requirements) # Requirement pair: [0] original requirement: [1]Extracted requirement missing_requirement_pairs = recommender.find_missing_skills() missing_requirements = [item[1] for item in missing_requirement_pairs] course_recommedations = recommender.recommend() if len(missing_requirements) > 0: return render_template('recommend.html', data=zip(missing_requirements, course_recommedations)) return render_template('matchall.html')
def get_recommendations(resume_file, requirement_file, ngram_range=(1, 1), use_tagger=False): with open(resume_file, 'r') as handle: resume = handle.read() with open(requirement_file, 'r') as handle: requirements = handle.read() recommender = Recommender(ngram_range=ngram_range, use_tagger=use_tagger) recommender.initialize_attributes(resume, requirements) recommender.vectorize_resume() recommender.vectorize_requirements() missing_requirements = recommender.find_missing_skills() print "Requirements:" print recommender.requirements print "preprocessed_requirements:" print recommender.preprocessed_requirements print "recommender.missing_requirements" print recommender.missing_requirements course_recommedations = recommender.recommend() return missing_requirements, course_recommedations
def recommend(): user_data = str(request.form['user_input'].encode('utf-8')) # --- Drive to the given URL, scrape and generate recs -- # scraped = profile_crawl(user_data) quora = scraped['text'] # Read and clean Quora dump recommendations read = TextParser() read.df = master_df filtered = read.preprocess_quora(quora) clean_quora = read.clean_up(filtered) pickle.dump(clean_quora, open("data/clean_quora.pkl", "wb")) rec = Recommender() test = rec.vectorize() top_ten_ind = rec.recommend() recs = read.df.ix[top_ten_ind] recs = recs.reset_index() recs['img_link'] = map(get_image, recs['title']) recs.loc[recs['type']=='course', 'img_link'] = 'http://www.michaellenox.com/wp-content/uploads/2014/07/coursera_square_logo.jpg' recs = recs[0:20] # Get the top twenty recommendations return render_template('testing.html', data = recs[['title', 'img_link']].values)