Ejemplo n.º 1
0
def helper(passenger):
    arrival = passenger.get_arrival_gate()
    dest = passenger.get_dest_gate()
    r = Recommender(demo=True)
    checkpoints = r.recommendations()

    curr_time = arrival['time']
    stack = []
    stack.append({
        'time': curr_time.strftime("%H:%M"),
        'title': 'Departure Gate {}'.format(dest['gate']),
        'subtitle': '{} to {}'.format(dest['flight'], dest['to'])
    })

    for chk in checkpoints:
        d = {}
        curr_time = curr_time - timedelta(minutes=randint(15, 60))
        d['time'] = curr_time.strftime("%H:%M")
        d['title'] = chk['name']
        max_char = 65  # KYLE: change to vary maximum characters allowed
        d['subtitle'] = chk['description'][:max_char] + '...'
        stack.append(d)

    stack.append({
        'time':
        (curr_time - timedelta(minutes=randint(30, 80))).strftime("%H:%M"),
        'title':
        'Arrival Gate {}'.format(arrival['gate']),
        'subtitle':
        '{} from {}'.format(arrival['flight'], arrival['from'])
    })
    return [i for i in reversed(stack)]
Ejemplo n.º 2
0
def unlike(request, fb_id, content_id):
    _recommender = Recommender()
    visited_content = _recommender.get_user_visited_content()
    print visited_content
    visited_content[fb_id].append(content_id)
    _recommender.save_user_visited_content(visited_content)
    return HttpResponse(status=200, content=json.dumps(visited_content), content_type="application/json")
def get_recommendations(builds, clean=cleaned, svd=None, encoder=False):
    clean = clean.drop_duplicates()
    print(clean)

    preprocessors = [StandardScaler()]
    if svd is not None and svd:
        svd = int(svd)
        svd = min([40, svd])
        preprocessors = [TruncatedSVD(svd)]
        clean[reg_cols] = scaler_reg.transform(clean[reg_cols])

    # if encoder is not None and encoder:
    #     #drop
    #     #scale

    #     predict = autoencoder_model.predict(clean.loc[builds] )

    #     #unscale
    #     #rename and combine columns

    recommender = Recommender(
        drop_columns=[
            'Date Published', 'price_build', 'number_ratings', 'avg_rating',
            'storage_price'
        ],
        preprocessors=preprocessors,
        # feature_weights = {'Core Clock' : 10},
    )
    recommender.fit(clean)
    return recommender.recommend(clean.loc[builds])
def main():
    parser = argparse.ArgumentParser(description='Nyc Event Recommender')

    parser = argparse.ArgumentParser()
    parser.add_argument('-t',
                        '--today',
                        action='store_true',
                        help='Show today\'s events')
    parser.add_argument('-a',
                        '--all',
                        action='store_true',
                        help='Show events all week')
    parser.add_argument('-j',
                        '--json',
                        action='store_true',
                        help='Show events in json format')
    args = parser.parse_args()

    if args.today or args.all:
        # os.system('cls' if os.name == 'nt' else 'clear')
        today = args.today
        json = args.json
        recommender = Recommender(today, json)
        events = recommender.get_recommendation()
        print(events)

    else:
        parser.print_help()
Ejemplo n.º 5
0
 def setUp(self):
     super(TestRecommender, self).setUp()
     mongo_source = MongoDataSource(MONGO_URL, FEATURE_FILE)
     mongo_source.save_bounds()
     self.data_source = mongo_source
     # print self.data_source.list_users()
     self.client = Recommender(SERVER_URL, mongo_source)
Ejemplo n.º 6
0
def authentication(ckey, csecret, atoken, atokensecret):
 consumer_key = ckey
 consumer_secret = csecret
 access_token = atoken
 access_token_secret = atokensecret

 auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
 auth.set_access_token(access_token, access_token_secret)

 api = tweepy.API(auth)
 my_tweets = api.user_timeline()
 my_first_tweet = my_tweets[0].text
 following = api.friends()
 #d = defaultdict(list)
 dict_of_followed_tweets = {}
 for friend in following:
   follow_acc = api.get_user(friend.screen_name)
   dict_of_followed_tweets[friend.screen_name] = friend.timeline()
   #x = [method for method in dir(friend) if callable(getattr(friend, method))]
 
 #return template('My fi {{dict_of_followed_tweets}}', dict_of_followed_tweets=dict_of_followed_tweets)
 recommenderObj = Recommender()
 generatedTweet = recommenderObj.generate(my_tweets, 1, following, 2, dict_of_followed_tweets)

 return template('Result: {{generatedTweetHere}}',generatedTweetHere =generatedTweet)
    def __init__(self):
        db = utils.connect_db('Two_Pick_Too_Drunk')
        collection = db['user']
        last_update_start = datetime.today()
        reviews = 'reviewer'
        clusters = 'reviewer_cluster'
        updated = (False,1)
        recommenderer = Recommender()
        while 1:
            users  = collection.find({"last_beer_update":{"$gte":last_update_start}})
            start_time = time.time()
            for user in users:
                print user['name']
                (results,result_set) = recommenderer.recommender(user["Beers_Rated"], reviews, clusters, db)
                collection.update({"_id": user["_id"]}, {"$set": {"Recommendations": results}})
                updated = (True,1)
            end_time = time.time()
            if updated[0]:
                print 'done with updation after %.3f seconds'%(end_time-start_time)

            last_update_start = datetime.now()
            if updated[0]:
                time.sleep(updated[1]*5);
                print 'Slept for '+str(updated[1]*10) + ' seconds'
                updated = (False,updated[1])
            else:
                if updated[1] < 30:
                    updated = (False,updated[1]+1)
                    time.sleep(updated[1]*10);
                    print 'Slept for '+str(updated[1]*10) + ' seconds'
                else:
                    time.sleep(updated[1]*10);
                    print 'Slept for '+str(updated[1]*10) + ' seconds'
Ejemplo n.º 8
0
 def __init__(self):
     logging.info("Setting up AppRecommender...")
     self.cfg = Config()
     self.rec = Recommender(self.cfg)
     self.requests_dir = "/var/www/AppRecommender/src/web/requests/"
     if not os.path.exists(self.requests_dir):
         os.makedirs(self.requests_dir)
Ejemplo n.º 9
0
def recommend():
    """
        Route used for recommending. Receives a user_id param for specifying the user and returns a json with all the results.
    """
    recommender = Recommender()
    user_id = int(request.args.get('user_id')) - 1
    return recommender.recommend([user_id])
Ejemplo n.º 10
0
def results():

    # if personal_rating list is empty, reroute to 404 page
    if check_personal_ratings(session["personal_ratings"]):
        return redirect(url_for('to404'))

    try:
        # create recommender object
        recommender = Recommender(session["personal_ratings"])

        # get array of five results
        results = recommender.get_result()

        # store first result
        first_result = results.pop(0)

        # store first result release year
        first_result_year = (first_result["release_date"])[0:4]
        return render_template('results.html',
                               title="Results",
                               first_result=first_result,
                               first_result_year=first_result_year,
                               results=results)

    except Exception as e:
        print(e)
        return redirect(url_for('to404'))
Ejemplo n.º 11
0
def main():
    # test
    all_data_df, time_data_df, category_data_df = parser.get_business_data()
    print("Enter a state, no abbreviations") #fix this
    state = input()
    print("Enter a city")
    city = input()
    print("Enter a lower and upper bound for a desired rating of the restaurant, in this format: lower_bound,upper_bound")
    bounds = input()
    bounds = bounds.split(",")
    print("Enter food categories, split with commas, no space.")
    categories = input()
    categories = categories.split(",")
    all_data_df = select_location(all_data_df, state, city)
    all_data_df = select_range(all_data_df, float(bounds[0]), float(bounds[1]))
    place_ids = get_place_ids(all_data_df, category_data_df, categories)

    print("Enter 0 for a random restaurant, and 1 for a list of restaurants")
    choice = input()
    if int(choice) == 0:
        select_ran_res(all_data_df, list(place_ids), time_data_df)
        print("See recommendation? <Y/N>")
        recommendation = input()
        if recommendation == "Y":
            recommender = Recommender(all_data_df, list(place_ids))
            recommender.print_random()
    else:
        show_all_res(all_data_df, list(place_ids))
Ejemplo n.º 12
0
def recommender():
    '''Creates user profile and recommends job'''
    skill1 = request.form.get('skill1') != None
    skill2 = request.form.get('skill2') != None
    skill3 = request.form.get('skill3') != None
    skill4 = request.form.get('skill4') != None
    skill5 = request.form.get('skill5') != None
    skill6 = request.form.get('skill6') != None
    skill7 = request.form.get('skill7') != None
    skill8 = request.form.get('skill8') != None
    skill9 = request.form.get('skill9') != None
    skill10 = request.form.get('skill10') != None
    skill11 = request.form.get('skill11') != None
    skill12 = request.form.get('skill12') != None
    skill13 = request.form.get('skill13') != None
    skill14 = request.form.get('skill14') != None
    skill15 = request.form.get('skill15') != None
    skill16 = request.form.get('skill16') != None
    skill17 = request.form.get('skill17') != None
    user_vector = [skill1, skill2, skill3, skill4, skill5, 
                skill6, skill7, skill8, skill9, skill10, skill11, 
                skill12, skill13, skill14, skill15, skill16, skill17]
    r = Recommender(user_vector)
    recs = r.recommend()
    descrip = r.rec_descrip
    str_recs = ' '.join(recs)
    return render_template('base.html') + f''' 
Ejemplo n.º 13
0
def load_models_and_businesses(spark_context):
    global recommender
    global richer_biz_info
    global model

    model_path = __get_model_path()
    recommender = Recommender(spark_context, model_path)
    model = recommender.load_mf_model()
Ejemplo n.º 14
0
class TestRecommender(unittest.TestCase):
    def setUp(self):
        self.recommender = Recommender('../data/sample.db') #样本数据库

    def test_recommend(self):
        self.recommender.train()
        for user in self.recommender.users:
            print 'user = {0}, {1}'.format(user, self.recommender.recommend(user, 3))
Ejemplo n.º 15
0
def analyzeBestFitUser():
    """
    对测试集中最佳预测的用户进行深入的探究
    """
    movies, movieTagMat, userRankMat, testCases = loadData()
    user2userPredictor = user2user(userRankMat, topK=105)
    item2itemPredictor = item2item(userRankMat, movieTagMat, topK=20)

    # do test
    # _, results = predictTest(user2userPredictor, testCases, "")
    # _, results = predictTest(item2itemPredictor, testCases, "")
    # userAvgSSE = defaultdict(float)
    # for res in results:
    #     userAvgSSE[res[0]] += (res[2] - res[1]) ** 2
    # sse = list(userAvgSSE.items())
    # sse.sort(key=lambda x: x[1])
    # # best-fit user
    # uid, minSSE = sse[0]
    # print("(uid, smallest SSE): ({}, {})".format(uid, minSSE))
    uid = 480
    # do recommend
    # 使用不同的推荐系统进行结果对比
    # recommender = Recommender(movieTagMat, userRankMat, movies, user2userPredictor)
    recommender = Recommender(movieTagMat, userRankMat, movies,
                              item2itemPredictor)
    recommendMovies = recommender.doRecommend(uid, 50)["recommended_movies"]
    print("recommended movies:")
    recommendedCategory = defaultdict(int)
    for m, r in recommendMovies.items():
        for genre in movies[r[0]].genres:
            recommendedCategory[genre] += 1
    for k, v in sorted(recommendedCategory.items(),
                       key=lambda d: d[0],
                       reverse=True):
        print("{}: {}".format(k, v))
    print("")
    # compare
    print("His or her favorite movies:")
    userRank = userRankMat[uid]
    idx = np.argsort(-userRank)[:50]
    userLikeCategory = defaultdict(int)
    for i in idx:
        for genre in movies[i].genres:
            userLikeCategory[genre] += 1
    for k, v in sorted(userLikeCategory.items(),
                       key=lambda d: d[0],
                       reverse=True):
        print("{}: {}".format(k, v))

    print("")
    for k, v in recommendedCategory.items():
        if k in userLikeCategory:
            print("{},{},{}".format(k, v, userLikeCategory[k]))
        else:
            print("{},{},0".format(k, v))
    for k, v in userLikeCategory.items():
        if k not in recommendedCategory:
            print("{},0,{}".format(k, v))
class Test(unittest.TestCase):
    """ Test class for Recommender """
    
    def setUp(self):
        """ initial set up of Test cases """
        unittest.TestCase.setUp(self)
        self.recommender = Recommender()
        test_movies = [{"movie_id" : 1, "title": "Movie1", "release_date" : "01-Jan-2011", "imdb_url":"https://someurl.com", "genres":"Drama Thriller", "ratings":0},
                       {"movie_id" : 20, "title": "Movie2", "release_date" : "01-Jan-2010", "imdb_url":"https://someurl.com", "genres":"Mystery", "ratings":0},
                       {"movie_id" : 35, "title": "Movie3", "release_date" : "01-Jan-2010", "imdb_url":"https://someurl.com", "genres":"Drama", "ratings":0},
                       {"movie_id" : 3, "title": "Movie4", "release_date" : "01-Jan-2012", "imdb_url":"https://someurl.com", "genres":"Thriller Crime", "ratings":0},
                       {"movie_id" : 4, "title": "Movie5", "release_date" : "01-Jan-2012", "imdb_url":"https://someurl.com", "genres":"Crime", "ratings":0},
                       {"movie_id" : 10, "title": "Movie6", "release_date" : "01-Jan-2012", "imdb_url":"https://someurl.com", "genres":"Western", "ratings":0},
                       {"movie_id" : 11, "title": "Movie7", "release_date" : "01-Jan-2012", "imdb_url":"https://someurl.com", "genres":"War Crime", "ratings":0}]
        movies_objects = []
        for movies in test_movies:
            movies_objects.append(Movie(movies["movie_id"], movies["title"], movies["release_date"], movies["imdb_url"], movies["genres"], 0))
        
        test_ratings = [{'movie_id': 1, 'user_id': 1, 'rating': 10, 'timestamp' : datetime.datetime.fromtimestamp(891350008)},
                        {'movie_id': 20, 'user_id': 2, 'rating': 9, 'timestamp' : datetime.datetime.fromtimestamp(891350010)},
                        {'movie_id': 35, 'user_id': 3, 'rating': 7, 'timestamp' : datetime.datetime.fromtimestamp(891350012)},
                        {'movie_id': 3, 'user_id': 4, 'rating': 4, 'timestamp' : datetime.datetime.fromtimestamp(891350014)},
                        {'movie_id': 4, 'user_id': 5, 'rating': 5, 'timestamp' : datetime.datetime.fromtimestamp(891350016)},
                        {'movie_id': 10, 'user_id': 6, 'rating': 5, 'timestamp' : datetime.datetime.fromtimestamp(891350018)},
                        {'movie_id': 11, 'user_id': 7, 'rating': 6, 'timestamp' : datetime.datetime.fromtimestamp(891350020)},
                        {'movie_id': 1, 'user_id': 7, 'rating': 10, 'timestamp' : datetime.datetime.fromtimestamp(891350008)},
                        {'movie_id': 20, 'user_id': 6, 'rating': 9, 'timestamp' : datetime.datetime.fromtimestamp(891350010)},
                        {'movie_id': 35, 'user_id': 5, 'rating': 7, 'timestamp' : datetime.datetime.fromtimestamp(891350012)},
                        {'movie_id': 3, 'user_id': 4, 'rating': 4, 'timestamp' : datetime.datetime.fromtimestamp(891350014)},
                        {'movie_id': 4, 'user_id': 3, 'rating': 5, 'timestamp' : datetime.datetime.fromtimestamp(891350016)},
                        {'movie_id': 10, 'user_id': 2, 'rating': 5, 'timestamp' : datetime.datetime.fromtimestamp(891350018)},
                        {'movie_id': 11, 'user_id': 1, 'rating': 6, 'timestamp' : datetime.datetime.fromtimestamp(891350020)}]
        
        all_genres = {"unknown":0, "Action":1, "Adventure":2, "Animation":3, "Children's":4, "Comedy":5, "Crime":6, "Documentary":7, "Drama":8, "Fantasy":9, "Film-Noir":10,
                          "Horror":11, "Musical":12, "Mystery":13, "Romance":14, "Sci-Fi":15, "Thriller":16, "War":17, "Western":18}
        users_ = ["1 24 M technician 85711","2 53 F other 94043","3 23 M writer 32067","4 24 M technician 43537","5 33 F other 15213",
                  "6 42 M executive 98101","7 57 M administrator 91344","8 36 M administrator 05201","9 29 M student 01002","10 53 M lawyer 90703"]
        all_users = []
        for each_user in users_:
            each_user.split(" ")
            all_users.append(User(each_user[0], each_user[1], each_user[2], each_user[3], each_user[4]))
        
        self.recommender.all_ratings = test_ratings
        self.recommender.all_movies = movies_objects
        self.recommender.all_users = all_users
        self.recommender.all_genre = all_genres
    
        
    def test_recommend_for(self):
        """ test case for recommend_for """
        movie = self.recommender.recommend_for("h")
        self.assertIsNone(movie)
        
        movie = self.recommender.recommend_for(100)
        self.assertIsNone(movie)
        
        movie = self.recommender.recommend_for(1)
        self.assertEqual(movie[0].get_title(), "Movie2")
Ejemplo n.º 17
0
 def __init__(self):
     print(
         "Hello, welcome to use What2watch movie search and recommendation system.\n"
     )
     print(
         "Next, please select you want to use the search engine or the recommender.\n"
     )
     self.searcher = Searcher()
     self.recommender = Recommender()
Ejemplo n.º 18
0
def run(source, target, num_topics = 100, passes = 20, lang = 'en', distance_measure = euclidean, percentage = 0.05):
	"""
	Main entry point for this package. Contains and executes the whole data pipeline. 

	Arguments:
	source -- The path string to the source file containing all reviews
	target -- The path string to the target directory where the neighbors for all users will be saved

	Keyword arguments:
	num_topics -- The number of topics LDA is supposed to discover (default 100)
	passes -- The number of iterations for the statistical inference algorithm (default 20)
	lang -- The language the reviews shall be sorted by (default 'en')
	distance_measure -- A python function that measures the distance between two vectors in a num_topics-dimensional vector space. 
				Must take two numpy arrays and return a float. (default euclidean)
	percentage -- The cutoff for being a close neighbor, i.e. two users are close if their distance is 
			within the closest percentage percent of all distances (default 0.05) 
	"""
	with open(source) as f:
		all_reviews = []
		for line in f:
			all_reviews.append(json.loads(line))

	reviews = filter_by_language(all_reviews, lang)

	rt = ReviewTokenizer(reviews)
	rt.tokenize()

	db = DictionaryBuilder(rt.tokenized_docs)
	db.build()

	dtmb = DTMBuilder(db.dictionary, db.srcTexts)
	dtmb.build()

	ldaw = LDAWrapper(dtmb.dtm, db.dictionary)
	ldaw.run(num_topics = num_topics, passes = passes)

	modelwrapper = LDAModelWrapper(ldaw.ldamodel, db.dictionary, sortByUsers(rt.tokenized_docs))
	posteriors = modelwrapper.get_all_posteriors()

	means = {}
	for key, value in posteriors.iteritems():
		means[key] = mean(value).tolist()

	x = Recommender(means)
	y = x.calc_distances(distance_measure)

	threshhold = fivePercent(y, percentage)

	for user in means.iterkeys():
		z = x.calc_neighbors(user, distance_measure, threshhold = threshhold)
		if len(target) > 0:
			fileName = target + '/' + user + '.json'
		else:
			fileName = user + '.json'
		with open(fileName, 'w') as g:
			json.dump(z, g) 
def home():
	form = UserInput()
	if form.validate_on_submit():
		flash('Data taken successfully','success')
		data = request.form
		r = Recommender()
		global search
		results,search = r.recommend(data)
		return render_template('results.html',results=results)
	return render_template('home.html',form=form)
Ejemplo n.º 20
0
 def __init__(self):
     logging.info("Setting up survey...")
     self.cfg = Config()
     self.rec = Recommender(self.cfg)
     self.submissions_dir = "/var/www/AppRecommender/src/web/submissions/"
     if not os.path.exists(self.submissions_dir):
         os.makedirs(self.submissions_dir)
     self.strategies = [
         "cbh", "cbh_eset", "knn", "knn_eset", "knn_plus", "knnco"
     ]
Ejemplo n.º 21
0
def set_keywords(request):
    _recommender = Recommender()
    _recommender.set_keywords(Keyword.objects.keyword_array())

    for program in Program.objects.all():
        keyword_map = {}
        map(lambda (k, r): keyword_map.update({k: r}), program.keyword_set.values_list('text', 'relevancy'))
        _recommender.add_content_vector(program.key, keyword_map)

    return HttpResponse(content="OK")
    def init_recommender_system(self, rating_column, descriptor,
                                five_feature_columns, two_group_columns):
        utility_matrix, in_stock_reviews, users, items = self.get_utility_matrix(
        )
        self.recommender_system = Recommender(utility_matrix, in_stock_reviews,
                                              rating_column, descriptor,
                                              five_feature_columns,
                                              two_group_columns)

        return self.recommender_system
Ejemplo n.º 23
0
    def __init__(self, config={}):
        """ Requires configuration from cortex.yaml """
         
        # When using s3 bucket to download the model
        # s3 = boto3.client("s3")
        # s3.download_file(config["bucket"], config["key"], "w2v_limitingfactor_v3.51.model")

        self.model = Recommender('models/w2v_limitingfactor_v3.51.model')

        pass
Ejemplo n.º 24
0
class RecommenderTests(unittest2.TestCase):
    @classmethod
    def setUpClass(self):
        cfg = Config()
        cfg.popcon_index = "test_data/.sample_pxi"
        cfg.popcon_dir = "test_data/popcon_dir"
        cfg.clusters_dir = "test_data/clusters_dir"
        self.rec = Recommender(cfg)

    def test_set_strategy(self):
        self.rec.set_strategy("cb")
        self.assertIsInstance(self.rec.strategy,ContentBasedStrategy)
        self.assertEqual(self.rec.strategy.content,"full")
        self.rec.set_strategy("cbt")
        self.assertIsInstance(self.rec.strategy,ContentBasedStrategy)
        self.assertEqual(self.rec.strategy.content,"tag")
        self.rec.set_strategy("cbd")
        self.assertIsInstance(self.rec.strategy,ContentBasedStrategy)
        self.assertEqual(self.rec.strategy.content,"desc")
        self.rec.set_strategy("col")
        self.assertIsInstance(self.rec.strategy,CollaborativeStrategy)

    def test_get_recommendation(self):
        user = User({"inkscape": 1, "gimp": 1, "eog":1})
        result = self.rec.get_recommendation(user)
        self.assertIsInstance(result, RecommendationResult)
        self.assertGreater(len(result.item_score),0)
Ejemplo n.º 25
0
def process(headline, description):
    tweets_filtered = []
    query = headline + ' ' + description
    prediction = predictor.predictor(query)
    prediction = sorted(prediction, key=lambda i: i[1], reverse=True)
    index = prediction[0][0]
    category = categories[index]
    r = Recommender(query, dataset[category])
    results = r.return_results()
    #category_index =
    return jsonify(results), 200
Ejemplo n.º 26
0
 def __init__(self, path, model_c, model_cn, model_s, model_sn):
     self.recs = [
         Recommender(path, model_c),
         Recommender(path, model_cn),
         Recommender(path, model_s),
         Recommender(path, model_sn)
     ]
     self.models = [
         "cbow", "cbow-negative", "skipgram", "skipgram-negative"
     ]
     self.test_scenarios = []
Ejemplo n.º 27
0
def run_strategy(cfg, sample_file):
    rec = Recommender(cfg)
    repo_size = rec.items_repository.get_doccount()
    results = ExperimentResults(repo_size)
    label = get_label(cfg)
    population_sample = []
    sample_str = sample_file.split('/')[-1]
    with open(sample_file, 'r') as f:
        for line in f.readlines():
            user_id = line.strip('\n')
            population_sample.append(
                os.path.join(cfg.popcon_dir, user_id[:2], user_id))
    sample_dir = ("results/roc-sample/%s" % sample_str)
    if not os.path.exists(sample_dir):
        os.makedirs(sample_dir)
    log_file = os.path.join(sample_dir, label["values"])

    # n iterations per population user
    for submission_file in population_sample:
        user = PopconSystem(submission_file)
        user.filter_pkg_profile(cfg.pkgs_filter)
        user.maximal_pkg_profile()
        for n in range(iterations):
            # Fill sample profile
            profile_len = len(user.pkg_profile)
            item_score = {}
            for pkg in user.pkg_profile:
                item_score[pkg] = user.item_score[pkg]
            sample = {}
            sample_size = int(profile_len * 0.9)
            for i in range(sample_size):
                key = random.choice(item_score.keys())
                sample[key] = item_score.pop(key)
            iteration_user = User(item_score)
            recommendation = rec.get_recommendation(iteration_user, repo_size)
            if hasattr(recommendation, "ranking"):
                results.add_result(recommendation.ranking, sample)

    plot_roc(results, log_file)
    plot_roc(results, log_file, 1)
    with open(log_file + "-roc.jpg.comment", 'w') as f:
        f.write("# %s\n# %s\n\n" %
                (label["description"], label["values"]))
        f.write("# roc AUC\n%.4f\n\n" % results.get_auc())
        f.write(
            "# threshold\tmean_fpr\tdev_fpr\t\tmean_tpr\tdev_tpr\t\tcoverage\n")  # noqa
        for size in results.thresholds:
            f.write("%4d\t\t%.4f\t\t%.4f\t\t%.4f\t\t%.4f\t\t%.4f\n" %
                    (size, numpy.mean(results.fpr[size]),
                     numpy.std(results.fpr[size]),
                     numpy.mean(results.recall[size]),
                     numpy.std(results.recall[size]),
                     numpy.mean(results.coverage(size))))
Ejemplo n.º 28
0
def run_strategy(cfg, sample_file):
    rec = Recommender(cfg)
    repo_size = rec.items_repository.get_doccount()
    results = ExperimentResults(repo_size)
    label = get_label(cfg)
    population_sample = []
    sample_str = sample_file.split('/')[-1]
    with open(sample_file, 'r') as f:
        for line in f.readlines():
            user_id = line.strip('\n')
            population_sample.append(
                os.path.join(cfg.popcon_dir, user_id[:2], user_id))
    sample_dir = ("results/roc-sample/%s" % sample_str)
    if not os.path.exists(sample_dir):
        os.makedirs(sample_dir)
    log_file = os.path.join(sample_dir, label["values"])

    # n iterations per population user
    for submission_file in population_sample:
        user = PopconSystem(submission_file)
        user.filter_pkg_profile(cfg.pkgs_filter)
        user.maximal_pkg_profile()
        for n in range(iterations):
            # Fill sample profile
            profile_len = len(user.pkg_profile)
            item_score = {}
            for pkg in user.pkg_profile:
                item_score[pkg] = user.item_score[pkg]
            sample = {}
            sample_size = int(profile_len * 0.9)
            for i in range(sample_size):
                key = random.choice(item_score.keys())
                sample[key] = item_score.pop(key)
            iteration_user = User(item_score)
            recommendation = rec.get_recommendation(iteration_user, repo_size)
            if hasattr(recommendation, "ranking"):
                results.add_result(recommendation.ranking, sample)

    plot_roc(results, log_file)
    plot_roc(results, log_file, 1)
    with open(log_file + "-roc.jpg.comment", 'w') as f:
        f.write("# %s\n# %s\n\n" % (label["description"], label["values"]))
        f.write("# roc AUC\n%.4f\n\n" % results.get_auc())
        f.write(
            "# threshold\tmean_fpr\tdev_fpr\t\tmean_tpr\tdev_tpr\t\tcoverage\n"
        )  # noqa
        for size in results.thresholds:
            f.write(
                "%4d\t\t%.4f\t\t%.4f\t\t%.4f\t\t%.4f\t\t%.4f\n" %
                (size, numpy.mean(results.fpr[size]),
                 numpy.std(results.fpr[size]), numpy.mean(
                     results.recall[size]), numpy.std(results.recall[size]),
                 numpy.mean(results.coverage(size))))
def get_recom_from_input(username, input_name, data):
    '''
    generate recommendations using input from the request form
        - INPUT:
            username str
            input_name  str
            data: input from the request form
        - OUTPUT:  results_dict 
           dict(username = username,\
                input_data = data, input_name = input_name, sorted_topics = sorted_topics_for_inputs, \
                idx = range(df_recom.shape[0]), \
                df_recom = df_recom, relevant_all=relevant_all)

        - pre-requisit:

    '''
    model_name = 'v2_2'
    fname = input_name

    relevant_all = None
    # hard code process used in v2_2 model
    func_tokenizer = TfidfVectorizer(stop_words='english').build_tokenizer()
    func_stemmer = PorterStemmer()

    # load model
    t0 = time.time()
    recommender = Recommender(model_name, func_tokenizer, func_stemmer)

    # read in input text
    cleaned_slack = pre_clean_text(func_tokenizer, data)

    W, tokenized_slacks2, test_X2, top_features_list = recommender.process_input(
        cleaned_slack)
    sorted_topics = recommender.topic_model.sorted_topics_for_articles(W)

    print 'input name: %s' % input_name
    # recommendations
    print '--------------- recommendations --------------'
    df_recom = recommender.calculate_recommendations(W, test_X2, fname)
    print sorted_topics
    t1 = time.time()
    print "finished in  %4.4f min %s " % ((t1 - t0) / 60, 'finished all processing\n')

    df_recom['topics'] = df_recom['topics'].apply(format_related_topics)

    results_dict = dict(username=username,
                        input_data=data, input_name=input_name, sorted_topics=sorted_topics,
                        idx=range(df_recom.shape[0]),
                        df_recom=df_recom, relevant_all=relevant_all)

    with open(dummy_result_pkl, 'w') as out_fh:
        pickle.dump(results_dict, out_fh)
    return results_dict
Ejemplo n.º 30
0
def authentication(ckey, csecret, atoken, atokensecret, topic):
 consumer_key = ckey
 consumer_secret = csecret
 access_token = atoken
 access_token_secret = atokensecret

 auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
 auth.set_access_token(access_token, access_token_secret)

 api = tweepy.API(auth)
 results = [ status._json for status in tweepy.Cursor(api.search,
                           q=topic,
                           count=1000).items(1000)]

 my_tweets = api.user_timeline()
 my_first_tweet = my_tweets[0].text
 following = api.followers()

 recommenderObj = Recommender()
 generatedTweet = recommenderObj.generate(my_tweets, 1, following, 2)
 accounts_recommend = recommenderObj.accounts_recommender(results)
 user_mentions = [ mention[0] for mention in accounts_recommend[0]]
 users_mentions_counts = [mention[1] for mention in accounts_recommend[0]]
 users = [user[0] for user in accounts_recommend[1]]
 users_counts = [user[1] for user in accounts_recommend[1]]

 return template("My first Tweet was: {{my_first_tweet_here}}, my generated text is {{generatedTweetHere}}"
                 "Your Topic is: {{tweets_topic}}."
                 "Here are some accounts that you may interested in:"
                 "Among all the tweets:"
                 "@{{first_mention}} was mentioned {{first_metion_count}} times."
                 "@{{second_mention}} was mentioned {{second_metion_count}} times."
                 "@{{third_mention}} was mentioned {{third_metion_count}} times."
                 "{{first_user_count}} of @{{first_user}}'s tweets are about {{tweets_topic}}."
                 "{{second_user_count}} of @{{second_user}}'s tweets are about {{tweets_topic}}."
                 "{{third_user_count}} of @{{third_user}}'s tweets are about {{tweets_topic}}."
                 ,
                 my_first_tweet_here = my_first_tweet,
                 generatedTweetHere = generatedTweet,
                 tweets_topic = topic,
                 first_mention = user_mentions[0],
                 second_mention = user_mentions[1],
                 third_mention = user_mentions[2],
                 first_metion_count = users_mentions_counts[0],
                 second_metion_count = users_mentions_counts[1],
                 third_metion_count = users_mentions_counts[2],
                 first_user_count = users_counts[0],
                 second_user_count = users_counts[1],
                 third_user_count = users_counts[2],
                 first_user = users[0],
                 second_user = users[1],
                 third_user = users[2])
Ejemplo n.º 31
0
def run_strategy(cfg, user):
    for weight in weighting:
        cfg.weight = weight[0]
        cfg.bm25_k1 = weight[1]
        rec = Recommender(cfg)
        repo_size = rec.items_repository.get_doccount()
        for proportion in sample_proportions:
            results = ExperimentResults(repo_size)
            label = get_label(cfg, proportion)
            log_file = "results/strategies/" + label["values"]
            for n in range(iterations):
                # Fill sample profile
                profile_size = len(user.pkg_profile)
                item_score = {}
                for pkg in user.pkg_profile:
                    item_score[pkg] = user.item_score[pkg]
                sample = {}
                sample_size = int(profile_size * proportion)
                for i in range(sample_size):
                    key = random.choice(item_score.keys())
                    sample[key] = item_score.pop(key)
                iteration_user = User(item_score)
                recommendation = rec.get_recommendation(
                    iteration_user, repo_size)
                write_recall_log(label, n, sample, recommendation,
                                 profile_size, repo_size, log_file)
                if hasattr(recommendation, "ranking"):
                    results.add_result(recommendation.ranking, sample)
            with open(log_file, 'w') as f:
                precision_10 = sum(results.precision[10]) / len(
                    results.precision[10])
                f1_10 = sum(results.f1[10]) / len(results.f1[10])
                f05_10 = sum(results.f05[10]) / len(results.f05[10])
                f.write("# %s\n# %s\n\ncoverage %d\n\n" %
                        (label["description"], label["values"],
                         recommendation.size))
                f.write("# best results (recommendation size; metric)\n")
                f.write(
                    "precision (%d; %.2f)\nf1 (%d; %.2f)\nf05 (%d; %.2f)\n\n" %
                    (results.best_precision()[0], results.best_precision()[1],
                     results.best_f1()[0], results.best_f1()[1],
                     results.best_f05()[0], results.best_f05()[1]))
                f.write(
                    "# recommendation size 10\nprecision (10; %.2f)\nf1 (10; %.2f)\nf05 (10; %.2f)"
                    %  # noqa
                    (precision_10, f1_10, f05_10))
            precision = results.get_precision_summary()
            recall = results.get_recall_summary()
            f1 = results.get_f1_summary()
            f05 = results.get_f05_summary()
            accuracy = results.get_accuracy_summary()
            plot_summary(precision, recall, f1, f05, accuracy, log_file)
Ejemplo n.º 32
0
    def show_ratings(self,_args,ObannonsBeerDict):
        args=list()
        for arg in _args:
            args.append({'BeerId':arg,'Rating':float(_args[arg][0]) })
        user_ratings = args
        db = utils.connect_db('Two_Pick_Too_Drunk')

        reviews = 'obannons_reviews'
        clusters = 'obannons_reviews_cluster'

        recommenderer = Recommender()
        (results,result_set) = recommenderer.recommender(user_ratings, reviews, clusters, db, 0, 15)
        self.render("ratings.html",OBD=ObannonsBeerDict,results=results, result_set = result_set)
Ejemplo n.º 33
0
    def show_ratings(self, _args, ObannonsBeerDict):
        args = list()
        for arg in _args:
            args.append({"BeerId": arg, "Rating": float(_args[arg][0])})
        user_ratings = args
        db = utils.connect_db("Two_Pick_Too_Drunk")

        reviews = "obannons_reviews"
        clusters = "obannons_reviews_cluster"

        recommenderer = Recommender()
        (results, result_set) = recommenderer.recommender(user_ratings, reviews, clusters, db)
        self.render("ratings.html", OBD=ObannonsBeerDict, results=results, result_set=result_set)
Ejemplo n.º 34
0
def run_strategy(cfg, user):
    for weight in weighting:
        cfg.weight = weight[0]
        cfg.bm25_k1 = weight[1]
        rec = Recommender(cfg)
        repo_size = rec.items_repository.get_doccount()
        for proportion in sample_proportions:
            results = ExperimentResults(repo_size)
            label = get_label(cfg, proportion)
            log_file = "results/strategies/" + label["values"]
            for n in range(iterations):
                # Fill sample profile
                profile_size = len(user.pkg_profile)
                item_score = {}
                for pkg in user.pkg_profile:
                    item_score[pkg] = user.item_score[pkg]
                sample = {}
                sample_size = int(profile_size * proportion)
                for i in range(sample_size):
                    key = random.choice(item_score.keys())
                    sample[key] = item_score.pop(key)
                iteration_user = User(item_score)
                recommendation = rec.get_recommendation(
                    iteration_user, repo_size)
                write_recall_log(
                    label, n, sample, recommendation, profile_size, repo_size,
                    log_file)
                if hasattr(recommendation, "ranking"):
                    results.add_result(recommendation.ranking, sample)
            with open(log_file, 'w') as f:
                precision_10 = sum(results.precision[10]) / len(
                    results.precision[10])
                f1_10 = sum(results.f1[10]) / len(results.f1[10])
                f05_10 = sum(results.f05[10]) / len(results.f05[10])
                f.write("# %s\n# %s\n\ncoverage %d\n\n" %
                        (label["description"], label["values"],
                         recommendation.size))
                f.write("# best results (recommendation size; metric)\n")
                f.write(
                    "precision (%d; %.2f)\nf1 (%d; %.2f)\nf05 (%d; %.2f)\n\n" %
                    (results.best_precision()[0], results.best_precision()[1],
                     results.best_f1()[0], results.best_f1()[1],
                     results.best_f05()[0], results.best_f05()[1]))
                f.write("# recommendation size 10\nprecision (10; %.2f)\nf1 (10; %.2f)\nf05 (10; %.2f)" %  # noqa
                        (precision_10, f1_10, f05_10))
            precision = results.get_precision_summary()
            recall = results.get_recall_summary()
            f1 = results.get_f1_summary()
            f05 = results.get_f05_summary()
            accuracy = results.get_accuracy_summary()
            plot_summary(precision, recall, f1, f05, accuracy, log_file)
Ejemplo n.º 35
0
    def train(self):
        viewed_together_data = self.read_data(
            self.data_paths[self.config.VIEWED_TOGETHER])
        bought_together_data = self.read_data(
            self.data_paths[self.config.BOUGHT_TOGETHER])
        all_products_data = self.read_data(
            self.data_paths[self.config.ALL_PRODUCTS])
        price_list_data = self.read_data(
            self.data_paths[self.config.PRICE_LIST])
        """getting some columns in lower case"""
        transformed_all_products_data = uniform_data(all_products_data,
                                                     self.product_attributes)
        """explode the lists into tuples of combinations per session ID for views and per user in bought"""
        print(
            "For the view Dataframe breaking lists of brands, product categories, product_types "
            "into permutations of brands, product categories, product_types as a list of tuples"
        )
        viewed_together_cols, group_by_col = [
            'SID_IDX', 'CONFIG_ID', 'PRODUCT_CATEGORY', 'PRODUCT_TYPE', 'BRAND'
        ], 'SID_IDX'
        (tuple_list_viewed_brand, tuple_list_viewed_product_category,
         tuple_list_viewed_product_type,
         tuple_list_viewed_config) = self.transform_data(
             viewed_together_data, self.product_attributes,
             viewed_together_cols, group_by_col)

        print(
            "For the bought Dataframe breaking lists of brands, product categories, product_types "
            "into permutations of brands, product categories, product_types as a list of tuples"
        )

        bought_together_cols, group_by_col = [
            'CUSTOMER_IDX', 'CONFIG_ID', 'PRODUCT_CATEGORY', 'PRODUCT_TYPE',
            'BRAND'
        ], 'CUSTOMER_IDX'

        (tuple_list_bought_brand, tuple_list_bought_product_category,
         tuple_list_bought_product_type,
         tuple_list_bought_config) = self.transform_data(
             bought_together_data, self.product_attributes,
             bought_together_cols, group_by_col)

        recommender = Recommender()
        trained_data, _ = recommender.fit(
            tuple_list_viewed_brand, tuple_list_bought_brand,
            tuple_list_viewed_product_category,
            tuple_list_bought_product_category, tuple_list_viewed_product_type,
            tuple_list_bought_product_type, tuple_list_viewed_config,
            tuple_list_bought_config, transformed_all_products_data,
            price_list_data)
        self.write_data(trained_data)
Ejemplo n.º 36
0
def process():
    tweets_filtered = []
    headline = request.args.get('headline')
    description = request.args.get('short_description')
    query = headline + ' ' + description
    prediction = predictor.predictor(query)
    prediction = sorted(prediction, key=lambda i: i[1], reverse=True)
    index = prediction[0][0]
    category = categories[index]
    r = Recommender(query, dataset[category])
    results = r.return_results()
    #category_index =
    print(results)
    return jsonify(results), 200
Ejemplo n.º 37
0
 def __init__(self, data, metric=DEFAULT_METRIC, model_size=50):
     self.items = defaultdict(dict)
     for user, ratings in data.user_ratings.iteritems():
         for item, rating in ratings.iteritems():
             self.items[item][user] = rating
     Recommender.__init__(self, self.items, metric)
     self.data = data
     self.model_size = model_size
     
     self.item_similars = {}
     if exists(data.item_based_model_path):
         self.load_model(data.item_based_model_path)
     else:
         self.learn_model(data.item_based_model_path)
Ejemplo n.º 38
0
class AppRecommender:
    def __init__(self):
        logging.info("Setting up AppRecommender...")
        self.cfg = Config()
        self.rec = Recommender(self.cfg)
        self.requests_dir = "/var/www/AppRecommender/src/web/requests/"
        if not os.path.exists(self.requests_dir):
            os.makedirs(self.requests_dir)

    def POST(self):
        web_input = web.input(pkgs_file={})
        user_dir = tempfile.mkdtemp(prefix='', dir=self.requests_dir)
        user_id = user_dir.split("/")[-1]
        uploaded_file = os.path.join(user_dir, "uploaded_file")
        if web_input['pkgs_file'].value:
            lines = web_input['pkgs_file'].file.readlines()
            with open(uploaded_file, "w") as uploaded:
                uploaded.writelines(lines)
        with open(uploaded_file) as uploaded:
            if uploaded.readline().startswith('POPULARITY-CONTEST'):
                user = PopconSystem(uploaded_file, user_id)
            else:
                user = PkgsListSystem(uploaded_file, user_id)
        if len(user.pkg_profile) < 10:
            return render.error(
                [
                    "Could not extract profile from uploaded file. It must have at least 10 applications."
                ],  # noqa
                "/",
                "RECOMMENDATION")
        else:
            self.rec.set_strategy("knn_eset")
            user.maximal_pkg_profile()
            prediction = self.rec.get_recommendation(user, 12).get_prediction()
            logging.info("Prediction for user %s" % user.user_id)
            logging.info(str(prediction))
            recommendation = [result[0] for result in prediction]
            pkgs_details = []
            for pkg_name in recommendation:
                logging.info("Getting details of package %s" % pkg_name)
                pkg = DebianPackage(pkg_name)
                pkg.load_summary()
                pkgs_details.append(pkg)
            if pkgs_details:
                logging.info("Rendering recommendation...")
                return render.apprec(pkgs_details)
            else:
                return render.error(
                    ["No recommendation produced for the uploaded file."], "/",
                    "RECOMMENDATION")  # noqa
def evaluate_accuracy(tdata):
    """
    input the classifier, with test data, return accuracy
    testX is a sparse data, testy is an iterable
    """
    testX, testy, users = testpreprocess(tdata)
    rc = Recommender(testX)
    recommendations = [rc.recommend(i, number=3) for i in users]
    totalhit = 0
    for idx, val in enumerate(testy):
        if val in recommendations[idx]:
            totalhit += 1

    return totalhit / len(testy)
Ejemplo n.º 40
0
    def get_Recommendations(self):
        start_time = time.time()
        print "starting"
        user= self.get_current_user()

        db = utils.connect_db('Two_Pick_Too_Drunk')

        reviews = 'reviewer'
        clusters = 'reviewer_cluster'

        recommenderer = Recommender()
        (results,result_set) = recommenderer.recommender(user.Beers_Rated, reviews, clusters, db)
        end_time = time.time()
        print 'done with updation after %.3f seconds'%(end_time-start_time)
Ejemplo n.º 41
0
class TestRecommender(TestCase):
    def setUp(self):
        super(TestRecommender, self).setUp()
        mongo_source = MongoDataSource(MONGO_URL, FEATURE_FILE)
        mongo_source.save_bounds()
        self.data_source = mongo_source
        # print self.data_source.list_users()
        self.client = Recommender(SERVER_URL, mongo_source)

    def tearDown(self):
        super(TestRecommender, self).tearDown()
        remove(FEATURE_FILE)

    def test_recommend_exist(self):
        print self.client.recommend('54d3365bdb9eb83ba8ff36f3')
Ejemplo n.º 42
0
def main(args):
    start_time = time.time()
    simfcn = 'cosine'
    similarities = pickle.load(open("jhu.recommended.model", "rb"))
    recommender = Recommender(args.school, simfcn)

    ptts = []
    if args.action == "all":
        ptts = PersonalTimetable.objects.filter(school=args.school, semester=Semester.objects.filter(name=args.semester, year=args.year))
    else:
        print(args.action)
        major_students = Student.objects.filter(major=args.action)
        ptts = PersonalTimetable.objects.filter(school=args.school, semester=Semester.objects.filter(name=args.semester, year=args.year), student__in=major_students)
    scores = {}
    num_timetables = {}
    for ptt in ptts:
        course_ids = map(lambda c: c.id, list(ptt.courses.all()))
        length = len(course_ids)
        if length < args.num_remove + 1:
            continue
        s = score(recommender, course_ids, similarities, args.num_remove)
        if length not in scores:
            scores[length] = np.zeros(args.num_remove)
            num_timetables[length] = 0
        scores[length] += s
        num_timetables[length] += 1
    print(num_timetables)
    
    for length in scores:
        scores[length] /= float(num_timetables[length])
    print(scores)
    pickle.dump(scores, open('recommender.' + args.school + '.scores', "wb"))
Ejemplo n.º 43
0
def get_recommendations(url):
    recommend_api = 'http://chat-01.heron.safaribooks.com/chat/by-popularity?start=1&topic='
    recommended_url = 'http://www.safariflow.com/library/view/_/{fpid}/{chunk}'
    #Get the tags for the given URL
    success, result = Recommender.get_url_page_contents(url)
    if success:
        tokenizedTags = Recommender.getTags(result)
        #Get the matched topic from those keywords
        matchedTopic = Recommender.matchTopic(tokenizedTags)
        #Form the recommendation url
        apiurl = recommend_api + matchedTopic
        #fetch the safari recommendation url
        result = Recommender.fetchSafariRecommendation(apiurl)
        return True, recommended_url.format(**result)
    else:
        return False, result
Ejemplo n.º 44
0
class GitBook():    
    
    def init(self):
        self.recommender = Recommender()
        self.rm = ResourceManager()
        self.recommender.build_project_features()
        print 'Launched GitBook instance'
        
    def get_languages(self):
        return self.recommender.get_languages()
        
    def get_areas_of_interest(self):
        return self.recommender.get_aoi()
        
    def recommend_projects(self, languages, area_interest, difficulty):
        return self.recommender.recommend_projects(languages, area_interest, difficulty) 
Ejemplo n.º 45
0
def getRecServerSocket(port):
    # This function creates a socket to block server and returns it

    # Make socket
    transport = TSocket.TSocket('localhost', port)
    transport.setTimeout(30000)
    # Buffering is critical. Raw sockets are very slow
    transport = TTransport.TBufferedTransport(transport)
    # Wrap in a protocol
    protocol = TBinaryProtocol.TBinaryProtocol(transport)
    # Create a client to use the protocol encoder
    client = Recommender.Client(protocol)

    # Connect!
    #print "Connecting to block server on port", port
    try:
        transport.open()
    except Exception as e:
        print e
        log.error(
            "Exception while connecting to block server, check if server is running on port: {}"
            .format(port))
        transport.close()
        exit(1)

    return client
Ejemplo n.º 46
0
 def reset(self, params, rep):
     if params['name'].startswith("content"):
         cfg = Config()
         # if the index was not built yet
         # app_axi = AppAptXapianIndex(cfg.axi,"results/arnaldo/AppAxi")
         cfg.axi = "data/AppAxi"
         cfg.index_mode = "old"
         cfg.weight = params['weight']
         self.rec = Recommender(cfg)
         self.rec.set_strategy(params['strategy'])
         self.repo_size = self.rec.items_repository.get_doccount()
         self.user = LocalSystem()
         self.user.app_pkg_profile(self.rec.items_repository)
         self.user.no_auto_pkg_profile()
         self.sample_size = int(
             len(self.user.pkg_profile) * params['sample'])
Ejemplo n.º 47
0
def eval_model(parameters):
    print("Parameters:")
    pprint(parameters)
    print()

    rank = int(parameters['rank'])
    regParam = parameters['regParam']
    lambda_1 = parameters['lambda_1']
    lambda_2 = parameters['lambda_2']
    # maxIter = int(parameters['maxIter'])

    estimator = Recommender(
        useALS=True,
        useBias=True,
        rank=rank,
        regParam=regParam,
        lambda_1=lambda_1,
        lambda_2=lambda_2,
        lambda_3=0.0,
        # maxIter=maxIter,
        userCol='user',
        itemCol='item',
        ratingCol='rating',
        nonnegative=False)

    train_score, test_score = score_model(estimator)

    return {'loss': test_score, 'status': hyperopt.STATUS_OK}
def retrieve_recommendations(alphas, removed_scores_file_path, users_wanted_recommendations):
    users_recommended = {}
    print "Initializing recommender..."
    recommender = Recommender("movie_matcher/new_oliwer_data.txt", removed_scores_file_path, 0.5)
    print "Recommender initialization done."

    for alpha in alphas:
        print "Current alfa: ", alpha
        recommender.set_alpha(alpha)
        users_recommended[alpha] = {}
        i = 0
        for user in users_wanted_recommendations.keys():
            print "Recommending for user: "******"Recommendations done."
    return users_recommended
Ejemplo n.º 49
0
    def test_cross_validation(self):
        cfg = Config()
        axi = xapian.Database(cfg.axi)
        packages = ["gimp","aaphoto","eog","emacs","dia","ferret",
                    "festival","file","inkscape","xpdf"]
        path = "test_data/.sample_axi"
        sample_axi = SampleAptXapianIndex(packages,axi,path)
        rec = Recommender(cfg)
        rec.items_repository = sample_axi
        user = User({"gimp":1,"aaphoto":1,"eog":1,"emacs":1})

        metrics = []
        metrics.append(Precision())
        metrics.append(Recall())
        metrics.append(F1())

        validation = CrossValidation(0.3,5,rec,metrics,0.5)
        validation.run(user)
        print validation
Ejemplo n.º 50
0
 def __init__(self):
     logging.info("Setting up survey...")
     self.cfg = Config()
     self.rec = Recommender(self.cfg)
     self.submissions_dir = "/var/www/AppRecommender/src/web/submissions/"
     if not os.path.exists(self.submissions_dir):
         os.makedirs(self.submissions_dir)
     self.strategies = ["cbh", "cbh_eset",
                        "knn", "knn_eset", "knn_plus",
                        "knnco"]
    def __init__(self):
        db = utils.connect_db('Two_Pick_Too_Drunk')
        collection = db['user']
        last_update_start = datetime(2012, 12, 6)
        reviews = 'reviewer'
        clusters = 'reviewer_cluster'

        recommenderer = Recommender()
        while 1:
            users  = collection.find({"last_beer_update":{"$gte":last_update_start}})
            start_time = time.time()
            for user in users:
                (results,result_set) = recommenderer.recommender(user["Beers_Rated"], reviews, clusters, db)
                collection.update({"_id": user["_id"]}, {"$set": {"Recommendations": results}})
            end_time = time.time()
            print 'done with updation after %.3f seconds'%(end_time-start_time)

            last_update_start = datetime.now()
            time.sleep(10)
Ejemplo n.º 52
0
def run_strategy(cfg, user):
    rec = Recommender(cfg)
    repo_size = rec.items_repository.get_doccount()
    results = ExperimentResults(repo_size)
    label = get_label(cfg)
    user_dir = ("results/roc-suite/%s/%s" % (user.user_id[:8], cfg.strategy))
    if not os.path.exists(user_dir):
        os.makedirs(user_dir)
    log_file = os.path.join(user_dir, label["values"])
    for n in range(iterations):
        # Fill sample profile
        profile_len = len(user.pkg_profile)
        item_score = {}
        for pkg in user.pkg_profile:
            item_score[pkg] = user.item_score[pkg]
        sample = {}
        sample_size = int(profile_len * 0.9)
        for i in range(sample_size):
            key = random.choice(item_score.keys())
            sample[key] = item_score.pop(key)
        iteration_user = User(item_score)
        recommendation = rec.get_recommendation(iteration_user, repo_size)
        write_recall_log(
            label, n, sample, recommendation, profile_len, repo_size, log_file)
        if hasattr(recommendation, "ranking"):
            results.add_result(recommendation.ranking, sample)
    with open(log_file + "-roc.jpg.comment", 'w') as f:
        f.write("# %s\n# %s\n\n" %
                (label["description"], label["values"]))
        f.write("# roc AUC\n%.4f\n\n" % results.get_auc())
        f.write("# threshold\tprecision\trecall\t\tf05\t\tcoverage\n")
        for size in results.thresholds:
            f.write("%4d\t\t%.4f\t\t%.4f\t\t%.4f\t\t%.4f\n" %
                    (size, numpy.mean(results.precision[size]),
                     numpy.mean(results.recall[size]),
                     numpy.mean(results.f05[size]),
                     numpy.mean(results.coverage(size))))
    shutil.copy(log_file + "-roc.jpg.comment", log_file + ".jpg.comment")
    shutil.copy(log_file + "-roc.jpg.comment",
                log_file + "-logscale.jpg.comment")
    plot_roc(results, log_file)
    plot_summary(results, log_file)
Ejemplo n.º 53
0
 def __init__(self, valid_repos_list, pm_dir, assessments_dir):
     logging.basicConfig(filename="AvgUserSrdp_performance_"+str(time.time())+".log",level=logging.DEBUG)
     self.valid_repos_list = valid_repos_list
     self.NU = len(self.valid_repos_list)
     self.total_positives = np.array([len(a) for a in valid_repos_list])
     self.predicted_positives = np.zeros(self.NU)
     self.out_dir = assessments_dir
     self.fileno = 0
     self.pm_dir = pm_dir
     r_fn = Runner.run_results_fn(pm_dir, 0)
     self.pm_repos_list = Recommender.load_recommendations(r_fn)
Ejemplo n.º 54
0
def read_stream():
    """ Listens to the user-stream and reacts to mention events with a recommendation.
    """
    twitter_user_stream = TwitterStream(auth=OAuth(Config.access_token, Config.access_token_secret, Config.api_key,
                                                   Config.api_secret), domain='userstream.twitter.com')

    for msg in twitter_user_stream.user():
        logging.info(msg)
        recommend = False

        # check if the the bot was mentioned in the status update
        if "entities" in msg:
            for mention in msg["entities"]["user_mentions"]:
                if mention["screen_name"] == Config.name.replace("@", ""):
                    recommend = True

            if recommend:
                user_id = UserDao.add_user(msg["user"]["screen_name"], msg["user"]["id"])
                UserTweetDao.create_user_tweet(user_id, msg["id"], msg["text"], msg)
                Recommender.get_recommendation()
                distribute_recommendations()
Ejemplo n.º 55
0
def do_recommendation(tweet, keyword_list="", delete_fails=False):
    # TODO only persist if there is a recommendation?
    user = UserDao.add_user(tweet["user"]["screen_name"], tweet["user"]["id"])
    nr_distributed = 0

    if not UserTweetDao.is_existing_user_tweet(tweet["id"]):
        if len(keyword_list) > 0:
            tweet_text = keyword_list
        else:
            tweet_text = tweet["text"]

        UserTweetDao.create_user_tweet(user.id, tweet["id"], tweet_text, tweet)
        Recommender.get_recommendation()
        nr_distributed = distribute_recommendations()

        # TODO delete failed
        # if nr_distributed == 0 and delete_fails:
        #
        #     user.delete()
        #     pass

    return nr_distributed
Ejemplo n.º 56
0
def run():
    mse_hist = []
    mde_hist = []
    samples = 100
    config_dict = yaml.load(open(sys.argv[1], 'r'))
    uniq = config_dict['uniq_map_file']
    runiq = config_dict['runiq_map_file']
    path = config_dict['pickle_dir']
    rec = Recommender(path, uniq, runiq)
    for sample in range(samples):
        ch = []
        tags_in_agg1 = []
        tags_in_agg2 = []
        user = rec.generate_user()
        ch = rec.recommend(user, n=5)
        for fname in user:
            prettyprint_song(config_dict, fname)
        print user, ' --> ', ch
        print '-->'
        for fname in ch:
            prettyprint_song(config_dict, fname)
        agg1 = aggregate_tags(config_dict, user)
        agg2 = aggregate_tags(config_dict, ch)
        tags_in_agg1.append(len(agg1))
        tags_in_agg2.append(len(agg2))
        mse, mde = compare_aggregators(agg1, agg2)
        mde_hist.append(mde)
        mse_hist.append(mse)
        print "***************     SAMPLE %d" % (sample)

    _, (mse_plot, mde_plot) = plt.subplots(2)
    print float(sum(tags_in_agg1)) / len(tags_in_agg1)
    print float(sum(tags_in_agg2)) / len(tags_in_agg2)
    mse_plot.set_title("Mean Squared Error / Tags Hist")
    mde_plot.set_title("Manhattan Dist Error / Tags Hist")
    mse_plot.hist(mse_hist, bins=100)
    mde_plot.hist(mde_hist, bins=100)
    plt.savefig(config_dict['hist_path'])
Ejemplo n.º 57
0
 def reset(self, params, rep):
     if params['name'].startswith("content"):
         cfg = Config()
         #if the index was not built yet
         #app_axi = AppAptXapianIndex(cfg.axi,"results/arnaldo/AppAxi")
         cfg.axi = "data/AppAxi"
         cfg.index_mode = "old"
         cfg.weight = params['weight']
         self.rec = Recommender(cfg)
         self.rec.set_strategy(params['strategy'])
         self.repo_size = self.rec.items_repository.get_doccount()
         self.user = LocalSystem()
         self.user.app_pkg_profile(self.rec.items_repository)
         self.user.no_auto_pkg_profile()
         self.sample_size = int(len(self.user.pkg_profile)*params['sample'])
Ejemplo n.º 58
0
def main():
    startTime = datetime(2016,4,30,8)
    endTime = datetime(2016,5,1,18)

    recom = Recommender()
    recom.setTimeInterval(startTime,endTime)
    recom.setHabit({"0":90,"1":40,"2":50})
    recom.setBudget(1) # allow 1,2,3
    travelList = recom.recommend()
    print(travelList)

    day = 1
    travelDict = {}
    for _list in travelList:
        travelDict[day] = {}
        for spot in _list:
            travelDict[day][spot.name] = {"coord":spot.coord,"popularity":spot.popularity,"priceLevel":spot.priceLevel,"topic":spot.topicList,"order":spot.order}
        day += 1
Ejemplo n.º 59
0
    def post(self):
        json_obj = json_decode(self.request.body)
        print('Post data received')

        for key in list(json_obj.keys()):
            print('key: %s , value: %s' % (key, json_obj[key]))

        #response_to_send = {}
        #response_to_send['newkey'] = 'hello'
        #print('Response to return')
        #self.write(json.dumps(response_to_send))
        #print('yoooooo')
        start_date = json_obj['startDate'].split('-')
        end_date = json_obj['endDate'].split('-')
        #schedule
        startTime = datetime(int(start_date[0]),int(start_date[1]),int(start_date[2]),int(json_obj['startTime']))
        endTime = datetime(int(end_date[0]),int(end_date[1]),int(end_date[2]),int(json_obj['endTime']))
        recom = Recommender()
        recom.setTimeInterval(startTime,endTime)
        recom.setHabit(json_obj['like'])
        recom.setBudget(json_obj['Budget']) # allow 1,2,3
        travelList = recom.recommend()
        #print(travelList)

        day = 1
        travelDict = {}
        for _list in travelList:
            travelDict[day] = {}
            for spot in _list:
                #print(spot)
                #weather_icon = search_api.get_weather(spot.coord).get('weather')[0].get('icon')
                #weather_icon_url = 'http://openweathermap.org/img/w/' + weather_icon + '.png'
                weather_icon_url= ''
                # print('weather_icon_url = ' + weather_icon_url)
                travelDict[day][spot.name] = {"coord":spot.coord,"popularity":spot.popularity,
                                              "priceLevel":spot.priceLevel,"topic":spot.topicList,
                                              "order":spot.order,"ref":spot.ref,
                                              "weather": weather_icon_url}
            day += 1
        #web_util.write_json(travelDict,"travelList.json")
        #self.write(json_encode(travelDict))
        self.write(json.dumps(travelDict))
Ejemplo n.º 60
0
class AppRecommender:
    def __init__(self):
        self.recommender = Recommender()

    def make_recommendation(self, recommendation_size,
                            no_auto_pkg_profile=False):
        begin_time = datetime.datetime.now()
        logging.info("Computation started at %s" % begin_time)
        # user = RandomPopcon(cfg.popcon_dir,os.path.join(cfg.filters_dir,
        #                                                 "desktopapps"))
        user = self.get_user(no_auto_pkg_profile)
        user_reccomendation = (self.recommender.get_recommendation(
                               user, recommendation_size))

        logging.info("Recommending applications for user %s" % user.user_id)
        logging.info(user_reccomendation)

        end_time = datetime.datetime.now()
        logging.info("Computation completed at %s" % end_time)
        delta = end_time - begin_time
        logging.info("Time elapsed: %d seconds." % delta.seconds)

        return user_reccomendation

    def get_user(self, no_auto_pkg_profile):
        config = Config()

        user = LocalSystem()
        user.filter_pkg_profile(
            os.path.join(config.filters_dir, "desktopapps"))
        user.maximal_pkg_profile()

        if no_auto_pkg_profile:
            user.no_auto_pkg_profile()

        return user