def main(likesJson, moviesJson):
    global moviesParser
    global likesMap
    channelIndex = ChannelIndex(Channels.CHANNELS)
    likesParser = LikesParser(channelIndex, likeThreshold=3) #out of 5
    likesMap = likesParser.getUserDict(likesJson, build = True, count = None)
    
    creatingTrainingData(likesMap)
    
    moviesParser = MoviesParser(channelIndex)
    moviesParser.parseMoviesObjects(moviesJson, parse = True)

    userCount = len(likesMap)
    actorsCount = len(moviesParser.dictActor)
    directorsCount = len(moviesParser.dictDirector)
    genreCount = len(moviesParser.dictGenre)
    print "userCount-", userCount, ";actorsCount-", actorsCount, ";directorsCount-", directorsCount, ";genreCount-", genreCount
    # print("%d,%d,%d,%d" % (userCount, actorsCount, directorsCount, genreCount))
    graph = Graph(userCount, actorsCount, directorsCount, genreCount)
    print "likesMap", likesMap[3]
    graph.calculateUserAffinity(moviesParser.dictDirector, moviesParser.dictActor, moviesParser.dictGenre, likesMap)
    graph.calculateAffinityBetweenEntities()
    graph.calculateSelfAffinity()

    reco = Recommender(graph)
    movies = reco.recommend(likesParser.model, moviesParser, 3)
    print("Recommendations: ")
    for m in movies:
        movieObj = moviesParser.movies[m]
        movieName = movieObj['name']
        print "ID:", m, "-", movieName
Example #2
0
def test_get_missing_requirements():
    recommender = Recommender()
    lst = [0.2, 0.13, 0.04, 0.05, 0.049]
    preprocessed_requirements = ['r1', 'r2', 'r3', 'r4', 'r5']
    requirements = ['requirement1', 'requirement2', 'requirement3', 'requirement4', 'requirement5']
    n.assert_equal(recommender.get_missing_requirements(lst, preprocessed_requirements, requirements),
                [('r3', 'requirement3'), ('r5', 'requirement5')])
Example #3
0
def main():
    '''
    INPUT: None
    OUTPUT: Recommendations sorted in order of relevance
    
    Uses the TextParser and Recommender classes to generate resource recommendations given a user's Quora data
    '''

    read = TextParser()
    read.assemble_df()
    pickle.dump(read.df, open("data/master_df.pkl", "wb"))
    quora_user = open('data/quora_data.pkl')
    quora = pickle.load(quora_user)
    filtered = read.preprocess_quora(quora)
    clean_quora = read.clean_up(filtered)
    pickle.dump(clean_quora, open("data/clean_quora.pkl", "wb"))

    # Make recommendations
    rec = Recommender()
    test = rec.vectorize()
    top_ten_ind = rec.recommend()
    recs = read.df.ix[top_ten_ind]
    recs = recs.reset_index()
    recs['img_link'] = map(get_image, recs['title'])
    recs.loc[recs['type']=='course', 'img_link'] = 'http://www.michaellenox.com/wp-content/uploads/2014/07/coursera_square_logo.jpg'
    pickle.dump(recs[0:5], open("data/recs.pkl", "wb"))
    print "These are your recommendations: \n"
    print recs[['title', 'type', 'img_link']]
    return recs[['title', 'type', 'img_link']]
Example #4
0
def test_get_top_courses():
    recommender = Recommender()
    lst = [0.30, 0.23, 0.34, 0.22]
    number = 2
    courses = ['c1', 'c2', 'c3', 'c4']
    course_names = ['course1', 'course2', 'course3', 'course4']
    n.assert_equal(recommender.get_top_courses(lst, number, courses, course_names),
                [('c3', 'course3', 0.34), ('c1', 'course1', 0.30)])
Example #5
0
def analyze_portfolio():
    uid = get_id_from_session(request.headers.get('session'))
    if not uid:
        return jsonify({"message": "Bad session :("}), 400
    p = Parser(
        list(transactions.find({'_id': uid}, {
            'transaction': 1,
            '_id': 0
        }))[0],
        list(holdings.find({'_id': uid}, {
            'holding': 1,
            '_id': 0
        }))[0], 25)
    pv, direction = predict(p.get_neural_net_attrs(), train())
    r = Recommender(direction.tolist())
    recommendation = r.get_recommendation()
    return jsonify({
        'fitness': pv[0],
        'recommendation': recommendation[0],
        'link': recommendation[1]
    })
def get_recommendations(resume_file,
                        requirement_file,
                        ngram_range=(1, 1),
                        use_tagger=False):
    with open(resume_file, 'r') as handle:
        resume = handle.read()
    with open(requirement_file, 'r') as handle:
        requirements = handle.read()
    recommender = Recommender(ngram_range=ngram_range, use_tagger=use_tagger)
    recommender.initialize_attributes(resume, requirements)
    recommender.vectorize_resume()
    recommender.vectorize_requirements()
    missing_requirements = recommender.find_missing_skills()
    print "Requirements:"
    print recommender.requirements
    print "preprocessed_requirements:"
    print recommender.preprocessed_requirements
    print "recommender.missing_requirements"
    print recommender.missing_requirements
    course_recommedations = recommender.recommend()
    return missing_requirements, course_recommedations
Example #7
0
def recommend():
	user_data = str(request.form['user_input'].encode('utf-8'))

	# --- Drive to the given URL, scrape and generate recs -- #
	scraped = profile_crawl(user_data)
	quora = scraped['text']
	
	# Read and clean Quora dump recommendations
	read = TextParser()
	read.df = master_df
	filtered = read.preprocess_quora(quora)
	clean_quora = read.clean_up(filtered)
	pickle.dump(clean_quora, open("data/clean_quora.pkl", "wb"))
	rec = Recommender()
	test = rec.vectorize()
	top_ten_ind = rec.recommend()
	recs = read.df.ix[top_ten_ind]
	recs = recs.reset_index()
	recs['img_link'] = map(get_image, recs['title'])
	recs.loc[recs['type']=='course', 'img_link'] = 'http://www.michaellenox.com/wp-content/uploads/2014/07/coursera_square_logo.jpg'
	recs = recs[0:20] # Get the top twenty recommendations
	return render_template('testing.html', data = recs[['title', 'img_link']].values)
Example #8
0
def article_page(aid):
    session = Sessions.get()
    like = False
    aid = int(aid)
    if session:
        user = session.user
        neo4jdb.add_read_articles(user.id, aid)
        like = neo4jdb.user_liked_article(user.id, aid)
        other_views = neo4jdb.users_also_viewed(aid, user.id)
    else:
        other_views = neo4jdb.users_also_viewed(aid)
    article = Articles.get_by_id(aid)
    similars = Recommender.find_similar(aid)
    other_views = db.get_title_by_aids(other_views)
    return render_template('detail.html',
                           article=article,
                           similar=similars,
                           like=like,
                           others=other_views)
Example #9
0
def main(likesJson, moviesJson):
    (moviesParser, likesParser, trainLikesMap,
     testLikesMap) = createParsers(likesJson, moviesJson)
    # print(trainLikesMap)
    # print(testLikesMap)
    graph = createGraph(moviesParser, trainLikesMap)

    ## Generate recommendations for testLikesMap
    ## Count if recommended movies match any of the movies in value testLikesMap
    reco = Recommender(graph)
    # movies = reco.recommend(likesParser.model, moviesParser, 0)
    # print(movies)
    # print("Recommendations: ")
    # for m in movies:
    #     movieObj = moviesParser.movies[m.movie]
    #     movieName = movieObj['name']
    #     print(movieName)

    recoMap = createRecommendations(likesParser, reco, testLikesMap.keys())
    #print(recoMap)
    likesMap = likesParser.model
    accuracy = 0
    for user, movies2 in recoMap.items():
        #(watched, liked) = likesMap[user]
        liked = set(testLikesMap[user])
        # moviesList = map(getMovie, movies2)
        # hits = liked.intersection(set(moviesList))

        hits = liked.intersection(set(movies2))
        #print "test data", liked
        #print user
        #print movies2
        #print hits
        #print "user"+" " +user+" "+movies2
        if len(hits) > 0:
            accuracy += 1

    #print "accuracy count", accuracy
    #print "# users", len(recoMap)
    accuracy = accuracy / float(len(recoMap))
    accuracy = accuracy * 100
    print "Accuracy for the current test run is", accuracy
Example #10
0
def recommend():
    resume = request.form.get('resume', None)
    requirements = request.form.get('requirements', None)
    # Initialize recommender
    recommender = Recommender(ngram_range=(1, 1),
                              use_tagger=True,
                              use_stem=False)
    recommender.fit(resume, requirements)
    # Requirement pair: [0] original requirement: [1]Extracted requirement
    missing_requirement_pairs = recommender.find_missing_skills()
    missing_requirements = [item[1] for item in missing_requirement_pairs]
    course_recommedations = recommender.recommend()
    if len(missing_requirements) > 0:
        return render_template('recommend.html',
                               data=zip(missing_requirements,
                                        course_recommedations))
    return render_template('matchall.html')
Example #11
0
def test_not_empty_requirement1():
    recommender = Recommender()
    req = ''
    n.assert_equal(recommender.not_empty_requirement(req), False)
Example #12
0
def test_remove_stopwords():
    recommender = Recommender()
    sentence = 'A passion for making sense of lots of unstructured data'
    n.assert_equal(recommender.remove_stopwords(sentence), 'A for making sense of lots of unstructured data')
Example #13
0
def test_not_empty_requirement5():
    recommender = Recommender()
    req = '    A passion for making sense of lots of unstructured data    '
    n.assert_equal(recommender.not_empty_requirement(req), True)
Example #14
0
from recommend import NNRecommender
from recommend import Recommender

#  nn = NNRecommender(5)

#  outputs = nn.find_similar(5)
outputs = Recommender.find_similar(5)

print(outputs)
Example #15
0
def lemma_search(query_str):
    ids = Recommender.lemma_search(query_str)
    results = db.get_title_by_aids(ids)
    return results
Example #16
0
def ensembles_performance(k, d, topk=5):
    '''
    k: fold index
    d: latent dimensionality of pmf model
    topk: size of recomendation list
    '''

    print 'fold %d, dim %d, top %d list' % (k, d, topk)

    # returns train, valid, test
    with open(MODELFOLDER + '/u-100k-fold-d%d-%d.out' % (d, k), 'rb') as f:
        pmf_list = load(f)
        #pmf_list = pmf_list[:3]
    train, trainU, trainI, valid, validU, validI, test, testU, testI = \
        fold_load('ml-100k',k)

    with open('ml-100k/fold%d-hidden.pickle'%k, 'rb') as f:
        hidden_v = load(f)
    with open('ml-100k/test-hidden.pickle', 'rb') as f:
        hidden_t = load(f)

    print 'loaded pmf_list'

    RS_list = []
    for mf_id, pmf in enumerate(pmf_list):
        RS_list.append(Recommender(item_MF=pmf.items))
    print 'RS_list created'
    n_items = RS_list[0].n_items

    with open(RESULTFOLDER+'u-100k-fold-%d-d%d-top%d-results.out' % (d, k, topk), 'rb') as f:
        result = load(f)

    result = result[0:len(RS_list)]
    # ensembles
    # If order is changed, please adjust ENSMEBLE_ORDER constant
    E1 = Majority(RS_list, threshold=3)
    precisions = [line[2] for line in result]
    E1f = FilteredMajority(RS_list, performances=precisions, threshold=3)
    E2 = WeightedVote(RS_list, weights=precisions, threshold=3)
    best_RS = np.argmax(precisions)
    E3 = RS_list[best_RS]
    evalu_ensemble = [Evaluation(RS=E1, topk=topk),
                      Evaluation(RS=E1f, topk=topk),
                      Evaluation(RS=E2, topk=topk),
                      Evaluation(RS=E3, topk=topk)]
    print 'ensembles created'

    for e_id, evaluator in enumerate(evalu_ensemble):
        P, R = eval_users(test, hidden_t, evaluator, n_items)
        P = np.mean(P)
        R = np.mean(R)

        result.append([d, P, R])
        print '!!!concluded E', e_id, 'PR', P, R

    result[-1].insert(1, pmf_list[best_RS].lambdaa)

    print 'saving results'

    with open(RESULTFOLDER+'u-100k-fold-%d-d%d-top%d-results.out' % (d, k, topk), 'wb') as f:
        dump(result, f)