def main(likesJson, moviesJson): global moviesParser global likesMap channelIndex = ChannelIndex(Channels.CHANNELS) likesParser = LikesParser(channelIndex, likeThreshold=3) #out of 5 likesMap = likesParser.getUserDict(likesJson, build = True, count = None) creatingTrainingData(likesMap) moviesParser = MoviesParser(channelIndex) moviesParser.parseMoviesObjects(moviesJson, parse = True) userCount = len(likesMap) actorsCount = len(moviesParser.dictActor) directorsCount = len(moviesParser.dictDirector) genreCount = len(moviesParser.dictGenre) print "userCount-", userCount, ";actorsCount-", actorsCount, ";directorsCount-", directorsCount, ";genreCount-", genreCount # print("%d,%d,%d,%d" % (userCount, actorsCount, directorsCount, genreCount)) graph = Graph(userCount, actorsCount, directorsCount, genreCount) print "likesMap", likesMap[3] graph.calculateUserAffinity(moviesParser.dictDirector, moviesParser.dictActor, moviesParser.dictGenre, likesMap) graph.calculateAffinityBetweenEntities() graph.calculateSelfAffinity() reco = Recommender(graph) movies = reco.recommend(likesParser.model, moviesParser, 3) print("Recommendations: ") for m in movies: movieObj = moviesParser.movies[m] movieName = movieObj['name'] print "ID:", m, "-", movieName
def test_get_missing_requirements(): recommender = Recommender() lst = [0.2, 0.13, 0.04, 0.05, 0.049] preprocessed_requirements = ['r1', 'r2', 'r3', 'r4', 'r5'] requirements = ['requirement1', 'requirement2', 'requirement3', 'requirement4', 'requirement5'] n.assert_equal(recommender.get_missing_requirements(lst, preprocessed_requirements, requirements), [('r3', 'requirement3'), ('r5', 'requirement5')])
def main(): ''' INPUT: None OUTPUT: Recommendations sorted in order of relevance Uses the TextParser and Recommender classes to generate resource recommendations given a user's Quora data ''' read = TextParser() read.assemble_df() pickle.dump(read.df, open("data/master_df.pkl", "wb")) quora_user = open('data/quora_data.pkl') quora = pickle.load(quora_user) filtered = read.preprocess_quora(quora) clean_quora = read.clean_up(filtered) pickle.dump(clean_quora, open("data/clean_quora.pkl", "wb")) # Make recommendations rec = Recommender() test = rec.vectorize() top_ten_ind = rec.recommend() recs = read.df.ix[top_ten_ind] recs = recs.reset_index() recs['img_link'] = map(get_image, recs['title']) recs.loc[recs['type']=='course', 'img_link'] = 'http://www.michaellenox.com/wp-content/uploads/2014/07/coursera_square_logo.jpg' pickle.dump(recs[0:5], open("data/recs.pkl", "wb")) print "These are your recommendations: \n" print recs[['title', 'type', 'img_link']] return recs[['title', 'type', 'img_link']]
def test_get_top_courses(): recommender = Recommender() lst = [0.30, 0.23, 0.34, 0.22] number = 2 courses = ['c1', 'c2', 'c3', 'c4'] course_names = ['course1', 'course2', 'course3', 'course4'] n.assert_equal(recommender.get_top_courses(lst, number, courses, course_names), [('c3', 'course3', 0.34), ('c1', 'course1', 0.30)])
def analyze_portfolio(): uid = get_id_from_session(request.headers.get('session')) if not uid: return jsonify({"message": "Bad session :("}), 400 p = Parser( list(transactions.find({'_id': uid}, { 'transaction': 1, '_id': 0 }))[0], list(holdings.find({'_id': uid}, { 'holding': 1, '_id': 0 }))[0], 25) pv, direction = predict(p.get_neural_net_attrs(), train()) r = Recommender(direction.tolist()) recommendation = r.get_recommendation() return jsonify({ 'fitness': pv[0], 'recommendation': recommendation[0], 'link': recommendation[1] })
def get_recommendations(resume_file, requirement_file, ngram_range=(1, 1), use_tagger=False): with open(resume_file, 'r') as handle: resume = handle.read() with open(requirement_file, 'r') as handle: requirements = handle.read() recommender = Recommender(ngram_range=ngram_range, use_tagger=use_tagger) recommender.initialize_attributes(resume, requirements) recommender.vectorize_resume() recommender.vectorize_requirements() missing_requirements = recommender.find_missing_skills() print "Requirements:" print recommender.requirements print "preprocessed_requirements:" print recommender.preprocessed_requirements print "recommender.missing_requirements" print recommender.missing_requirements course_recommedations = recommender.recommend() return missing_requirements, course_recommedations
def recommend(): user_data = str(request.form['user_input'].encode('utf-8')) # --- Drive to the given URL, scrape and generate recs -- # scraped = profile_crawl(user_data) quora = scraped['text'] # Read and clean Quora dump recommendations read = TextParser() read.df = master_df filtered = read.preprocess_quora(quora) clean_quora = read.clean_up(filtered) pickle.dump(clean_quora, open("data/clean_quora.pkl", "wb")) rec = Recommender() test = rec.vectorize() top_ten_ind = rec.recommend() recs = read.df.ix[top_ten_ind] recs = recs.reset_index() recs['img_link'] = map(get_image, recs['title']) recs.loc[recs['type']=='course', 'img_link'] = 'http://www.michaellenox.com/wp-content/uploads/2014/07/coursera_square_logo.jpg' recs = recs[0:20] # Get the top twenty recommendations return render_template('testing.html', data = recs[['title', 'img_link']].values)
def article_page(aid): session = Sessions.get() like = False aid = int(aid) if session: user = session.user neo4jdb.add_read_articles(user.id, aid) like = neo4jdb.user_liked_article(user.id, aid) other_views = neo4jdb.users_also_viewed(aid, user.id) else: other_views = neo4jdb.users_also_viewed(aid) article = Articles.get_by_id(aid) similars = Recommender.find_similar(aid) other_views = db.get_title_by_aids(other_views) return render_template('detail.html', article=article, similar=similars, like=like, others=other_views)
def main(likesJson, moviesJson): (moviesParser, likesParser, trainLikesMap, testLikesMap) = createParsers(likesJson, moviesJson) # print(trainLikesMap) # print(testLikesMap) graph = createGraph(moviesParser, trainLikesMap) ## Generate recommendations for testLikesMap ## Count if recommended movies match any of the movies in value testLikesMap reco = Recommender(graph) # movies = reco.recommend(likesParser.model, moviesParser, 0) # print(movies) # print("Recommendations: ") # for m in movies: # movieObj = moviesParser.movies[m.movie] # movieName = movieObj['name'] # print(movieName) recoMap = createRecommendations(likesParser, reco, testLikesMap.keys()) #print(recoMap) likesMap = likesParser.model accuracy = 0 for user, movies2 in recoMap.items(): #(watched, liked) = likesMap[user] liked = set(testLikesMap[user]) # moviesList = map(getMovie, movies2) # hits = liked.intersection(set(moviesList)) hits = liked.intersection(set(movies2)) #print "test data", liked #print user #print movies2 #print hits #print "user"+" " +user+" "+movies2 if len(hits) > 0: accuracy += 1 #print "accuracy count", accuracy #print "# users", len(recoMap) accuracy = accuracy / float(len(recoMap)) accuracy = accuracy * 100 print "Accuracy for the current test run is", accuracy
def recommend(): resume = request.form.get('resume', None) requirements = request.form.get('requirements', None) # Initialize recommender recommender = Recommender(ngram_range=(1, 1), use_tagger=True, use_stem=False) recommender.fit(resume, requirements) # Requirement pair: [0] original requirement: [1]Extracted requirement missing_requirement_pairs = recommender.find_missing_skills() missing_requirements = [item[1] for item in missing_requirement_pairs] course_recommedations = recommender.recommend() if len(missing_requirements) > 0: return render_template('recommend.html', data=zip(missing_requirements, course_recommedations)) return render_template('matchall.html')
def test_not_empty_requirement1(): recommender = Recommender() req = '' n.assert_equal(recommender.not_empty_requirement(req), False)
def test_remove_stopwords(): recommender = Recommender() sentence = 'A passion for making sense of lots of unstructured data' n.assert_equal(recommender.remove_stopwords(sentence), 'A for making sense of lots of unstructured data')
def test_not_empty_requirement5(): recommender = Recommender() req = ' A passion for making sense of lots of unstructured data ' n.assert_equal(recommender.not_empty_requirement(req), True)
from recommend import NNRecommender from recommend import Recommender # nn = NNRecommender(5) # outputs = nn.find_similar(5) outputs = Recommender.find_similar(5) print(outputs)
def lemma_search(query_str): ids = Recommender.lemma_search(query_str) results = db.get_title_by_aids(ids) return results
def ensembles_performance(k, d, topk=5): ''' k: fold index d: latent dimensionality of pmf model topk: size of recomendation list ''' print 'fold %d, dim %d, top %d list' % (k, d, topk) # returns train, valid, test with open(MODELFOLDER + '/u-100k-fold-d%d-%d.out' % (d, k), 'rb') as f: pmf_list = load(f) #pmf_list = pmf_list[:3] train, trainU, trainI, valid, validU, validI, test, testU, testI = \ fold_load('ml-100k',k) with open('ml-100k/fold%d-hidden.pickle'%k, 'rb') as f: hidden_v = load(f) with open('ml-100k/test-hidden.pickle', 'rb') as f: hidden_t = load(f) print 'loaded pmf_list' RS_list = [] for mf_id, pmf in enumerate(pmf_list): RS_list.append(Recommender(item_MF=pmf.items)) print 'RS_list created' n_items = RS_list[0].n_items with open(RESULTFOLDER+'u-100k-fold-%d-d%d-top%d-results.out' % (d, k, topk), 'rb') as f: result = load(f) result = result[0:len(RS_list)] # ensembles # If order is changed, please adjust ENSMEBLE_ORDER constant E1 = Majority(RS_list, threshold=3) precisions = [line[2] for line in result] E1f = FilteredMajority(RS_list, performances=precisions, threshold=3) E2 = WeightedVote(RS_list, weights=precisions, threshold=3) best_RS = np.argmax(precisions) E3 = RS_list[best_RS] evalu_ensemble = [Evaluation(RS=E1, topk=topk), Evaluation(RS=E1f, topk=topk), Evaluation(RS=E2, topk=topk), Evaluation(RS=E3, topk=topk)] print 'ensembles created' for e_id, evaluator in enumerate(evalu_ensemble): P, R = eval_users(test, hidden_t, evaluator, n_items) P = np.mean(P) R = np.mean(R) result.append([d, P, R]) print '!!!concluded E', e_id, 'PR', P, R result[-1].insert(1, pmf_list[best_RS].lambdaa) print 'saving results' with open(RESULTFOLDER+'u-100k-fold-%d-d%d-top%d-results.out' % (d, k, topk), 'wb') as f: dump(result, f)