예제 #1
0
 def get(self, bibcode):
     stime = time.time()
     try:
         results = get_recommendations(bibcode)
     except Exception, err:
         current_app.logger.error('Recommender exception (%s): %s'%(bibcode, err))
         return {'msg': 'Unable to get results! (%s)' % err}, 500
예제 #2
0
def run_recommender():
    """
    Recommender page
    Presents user with recommedations based on inputs
    Also displays which films in the db the user's inputs were matched to
    """
    n_recommendations = 5
    selections = [
        request.args[f'movie_{n+1}'] for n in range(5)
        if request.args[f'movie_{n+1}'] != ""
    ]
    n_selections = len(selections)

    context = dict(title="Your recommendations:")

    if n_selections > 0:
        result = recommender.get_recommendations(selections, RATING_MATRIX,
                                                 MODEL, n_recommendations)
        result, interpreted_choices = result
        interpretation_message = "Based on our interpretation of your choices"
    else:
        result = ["Parasite"]
        interpreted_choices = []
        interpretation_message = ""

    context['movies'] = result
    context['interpreted_choices'] = interpreted_choices
    context['interpretation_message'] = interpretation_message
    context['user_input'] = selections
    context['inputs_to_interpretations'] = list(
        zip(selections, interpreted_choices))

    return render_template('recommendation.html', **context)
def evaluation_memory_based_user(matrix, similarities, neighbours,
                                 masked_items, user, n):
    """Generate Recall and Precision@N for a specific User

    Arguments:
        matrix {ndarray} -- User-Item Matrix
        similarities {ndarray} -- User Similarity Matrix
        neighbours {ndarray} -- List of N Neighbours for each User in User-Item Matrix
        masked_items {ndarray} -- List of Arrays with masked User-Items
        user {int} -- Index Number of User
        n {int} -- Number of Recommendations to be returned / measured by

    Returns:
        string -- Result of Recall@N Value for specific User
    """
    # calculate recommendations
    rec = get_recommendations(matrix, similarities, neighbours, user, n)
    rec = list(rec['item'])

    # get the items which were masked for the user
    rating_index = np.where(masked_items[:, 0] == user)
    # get list of masked items
    items_masked = list(masked_items[rating_index, :][0][:, 1])

    # compare both list and return lenght of matches
    hits = len(set(rec).intersection(items_masked))

    recall = hits / len(items_masked)
    precision = hits / n

    return precision, recall
예제 #4
0
 def get(self, bibcode):
     stime = time.time()
     try:
         results = get_recommendations(bibcode)
     except Exception, err:
         current_app.logger.error('Recommender exception (%s): %s'%(bibcode, err))
         # If the request for recommendations fails, we just want the UI to ignore and display nothing
         return {'Error': 'Unable to get results!'}, 200
예제 #5
0
def get_recommendations(id):
    """
    Controller for recommendations route.
    
    Arguments:
        id {int} -- Product ID
    """
    print(f"Product ID: {id}")
    return recommender.get_recommendations(id)
예제 #6
0
def post():
    req_data = request.get_json(silent=True)

    print(req_data)

    ratings = [(item['imdb_id'].lstrip('t').lstrip('0'), item['rating'])
               for item in req_data]
    recommendations = get_recommendations(ratings)

    data = [{'id': 'tt' + pad(movie)} for movie in recommendations]
    return json.dumps(data)
예제 #7
0
def recommend():
    name = request.args.get("name")

    if request.args.get("k"):
        k = int(request.args.get("k")) + 1
    else:
        k = None

    recommendations = recommender.get_recommendations(name,
                                                      recommender.cosine_sim,
                                                      k)

    return jsonify(recommendations)
예제 #8
0
def recommendations():
    #TO-DO: for some reason the recommendations seem to always be the same. or maybe its me doing it wrong somehow
    final_recommendations = []
    with DatabaseHandler('user1') as handler:
        movies_seen = handler.read_all_rows()
        recommended_movies_indexes = get_recommendations(movies_seen)[1]
        #from the movies list in theater, extract the indexes that match with the ones that the get recommendations returned
        for i in range(len(recommended_movies_indexes)):
            for j in range(len(movies_list)):
                if recommended_movies_indexes[i] == movies_list[j][0]:
                    final_recommendations.append(movies_list[j])
    return render_template('recommendations.html',
                           title='Recommended',
                           movies=final_recommendations)
예제 #9
0
def get_movie():
    html_form_data = dict(request.args)
    movie1 = html_form_data['movie1']
    rate1 = html_form_data['rate1']
    movie2 = html_form_data['movie2']
    rate2 = html_form_data['rate2']
    movie3 = html_form_data['movie3']
    rate3 = html_form_data['rate3']
    recomended_movies = recommender.get_recommendations(
        movie1, movie2, movie3, rate1, rate2, rate3)

    print(html_form_data)
    return render_template('result.html',
                           rec_movies=recomended_movies,
                           title='Movie Recomender')
예제 #10
0
def cross_validate(fold_length, k, metric='euclidean', heuristic=False):
    data_handler = NewUSsDataHandler()

    uss = data_handler.load_us_data()
    kf = KFold(n_splits=fold_length)
    precisions = []
    recalls = []
    f_measures = []
    uss_x = uss.drop(columns=['TCs'])
    uss_y = uss.loc[:, ['ID_US', 'TCs']]

    for train_index, test_index in kf.split(uss_x):
        train_set = uss_x.iloc[train_index, :]
        test_set = uss_x.iloc[test_index, :]

        for us_test in test_set.iterrows():
            recommendations = pd.DataFrame()
            if heuristic:
                recommendations = get_recommendations_heuristcs(us_test[1], train_set, k, distance_metric=metric)
            else:
                recommendations = get_recommendations(us_test[1], train_set, k, distance_metric=metric)

            if recommendations.empty:
                f_measures.append(0)
                precisions.append(0)
                recalls.append(0)
                continue

            real = uss_y.loc[uss_y['ID_US'] == us_test[1]['ID_US'], 'TCs'].str.split(',')
            real = pd.Series(real.iloc[0])
            real = real.iloc[:].astype(float)

            rec_ids = pd.Series(recommendations['ID'])
            rec_ids = rec_ids.iloc[:].astype(float)

            intersect_ds = pd.Series(np.intersect1d(real, rec_ids))

            recall = len(intersect_ds.index) / len(real.index)
            precision = len(intersect_ds.index) / len(recommendations.index)
            f_measure = 0
            if precision + recall > 0:
                f_measure = calculate_fbeta(precision, recall, 2)
            f_measures.append(f_measure)
            precisions.append(precision)
            recalls.append(recall)
    return precisions, recalls, f_measures
예제 #11
0
def upload():
    if request.method == 'POST':
        # Check if the post request has the file part
        if 'file' not in request.files:
            flash('No file attached in request')
            return redirect(request.url)
        file = request.files['file']

        # if user does not select file, browser also
        # submits an empty part without filename
        if file.filename == '':
            flash('No selected file')
            return redirect(request.url)

        elif not allowed_file(file.filename):
            abort(400, 'Incorrect file extension')
            flash('Incorrect file extension. Must be PDF!')
            return redirect(request.url)

        if file and allowed_file(file.filename):
            filename = secure_filename(file.filename)
            file.save(
                os.path.join(BASE_DIR, app.config['UPLOAD_FOLDER'], filename))

            resume_cleaned = final_resume_clean(
                os.path.join(app.config['UPLOAD_FOLDER'], filename), filename)
            jobs_df = pd.read_csv('cleaned_data.csv')

            global df
            df = get_recommendations(resume_cleaned, jobs_df)

            # Remove diacritics from original job descriptions
            custom_pipeline = [preprocessing.remove_diacritics]
            df['Description'] = hero.clean(df['Description'],
                                           pipeline=custom_pipeline)

    return render_template("recommendation.html",
                           column_names=df.columns.values,
                           target_column="Company",
                           hide_column="Job Description",
                           row_data=list(df.values.tolist()),
                           zip=zip)
예제 #12
0
def test_recommender(engine,
                     n_movies=5,
                     n_recommendations=5,
                     n_iterations=5000):
    """
    Perform a specified number of recommender iterations to see what the
    distribution of recommendations looks like
    Parameters:
        engine (sqlalchemy engine object):
        n_movies (int): number of movies to select for each iteration
        n_recommendations: number of recommendations that should be made in
                           each iteration
        n_iterations: number of recommendation iterations that should be
                      performed
    Returns
        pandas dataframe containing the randomly selected movies and
        corresponding recommendations for every iteration
    """
    session = sessionmaker(bind=engine)()

    _, _, rating_matrix, model =\
        recommender.prep_for_recommendations(fill_value='median')
    recommendation_records = pd.DataFrame()

    for i in range(n_iterations):
        selections = pick_random_movies(session, n_movies)
        result = recommender.get_recommendations(selections, rating_matrix,
                                                 model, n_recommendations)
        result, interpreted_choices = result
        record = pd.DataFrame({
            "iteration": i,
            "user_selections": selections,
            "interpretations": interpreted_choices,
            "recommendations": result
        })

        recommendation_records = recommendation_records.append(record)

    return recommendation_records
예제 #13
0
            intersect_ds = pd.Series(np.intersect1d(real, rec_ids))

            recall = len(intersect_ds.index) / len(real.index)
            precision = len(intersect_ds.index) / len(recommendations.index)
            f_measure = 0
            if precision + recall > 0:
                f_measure = calculate_fbeta(precision, recall, 2)
            f_measures.append(f_measure)
            precisions.append(precision)
            recalls.append(recall)
    return precisions, recalls, f_measures


def calculate_fbeta(precision, recall, beta):
    # return 2 * (((beta**2) * precision * recall) / (((beta**2) * precision) + recall))
    return ((beta**2 + 1) * precision * recall) / ((beta**2 * precision) + recall)


if __name__ == '__main__':
    data_handler = NewUSsDataHandler()

    uss = data_handler.load_us_data().drop(columns=['TCs'])

    newUS = {'ID_US': '#263', 'Módulo': 'Cadastro', 'Operação': 'Atualizar_dados', 'Plataforma': 'Web', 'RNFs': '1,2',
             'CAs': '5,6,7,8'}

    recommendations = get_recommendations(newUS, uss, 3, distance_metric='jaccard')

    print(recommendations.to_string())
    def test_everything(self):
        """Test to see if calling everything sequentially works"""
        from recommender import get_recommendations

        # To test this method we need both the mock for PostgreSQL
        # and the override for the Solr query (for 'get_article_data').
        # The Solr query needs to return the references and citation counts
        # for the papers found in the co-reads
        # The mock data below takes everything into account that was tested
        # before
        mockdata = [
            {
                "id": "1",
                "bibcode": "ppr1",
                "first_author": "au_ppr1",
                "title": ["ttl_ppr1"],
                "reference": ["r1", "r2"],
                "citation": ["c1"],
                "citation_count": 1,
            },
            {
                "id": "2",
                "bibcode": "ppr2",
                "first_author": "au_ppr2",
                "title": ["ttl_ppr2"],
                "reference": ["r2", "r3"],
                "citation": ["c1", "c2", "c3"],
                "citation_count": 3,
            },
            {
                "id": "3",
                "bibcode": "ppr3",
                "first_author": "au_ppr3",
                "title": ["ttl_ppr3"],
                "reference": ["r2", "r3"],
                "citation": ["c2", "c3"],
                "citation_count": 2,
            },
            {"id": "4", "bibcode": "foo", "keyword_norm": ["aberration", "ablation", "absorption"]},
            {"id": "5", "bibcode": "paper_3", "first_author": "au_paper3", "title": ["ttl_paper_3"]},
            {"id": "6", "bibcode": "c1", "first_author": "au_c1", "title": ["ttl_c1"]},
            {"id": "7", "bibcode": "r2", "first_author": "au_r2", "title": ["ttl_r2"]},
        ]
        httpretty.register_uri(
            httpretty.GET,
            self.app.config.get("RECOMMENDER_SOLR_PATH"),
            content_type="application/json",
            status=200,
            body="""{
            "responseHeader":{
            "status":0, "QTime":0,
            "params":{ "fl":"reference,citation", "indent":"true",
            "wt":"json", "q":"*"}},
            "response":{"numFound":10456930,"start":0,"docs":%s
            }}"""
            % json.dumps(mockdata),
        )
        # With the mock data the following recommendations should get generated
        expected_recommendations = {
            "paper": "a",
            "recommendations": [
                {"bibcode": "ppr2", "author": u"au_ppr2,+", "title": u"ttl_ppr2"},
                {"bibcode": "ppr2", "author": u"au_ppr2,+", "title": u"ttl_ppr2"},
                {"bibcode": u"ppr1", "author": u"au_ppr1,+", "title": u"ttl_ppr1"},
                {"bibcode": u"c1", "author": u"au_c1,+", "title": u"ttl_c1"},
                {"bibcode": u"r2", "author": u"au_r2,+", "title": u"ttl_r2"},
                {"bibcode": u"ppr2", "author": u"au_ppr2,+", "title": u"ttl_ppr2"},
            ],
        }
        # Generate the recommendations
        recommendations = get_recommendations("a")
        # Do the final check
        self.assertEqual(recommendations, expected_recommendations)
예제 #15
0
def find_movie(movies_seen):
    movies = get_recommendations(movies_seen)
    print(movies)
    return movies
예제 #16
0
 def test_everything(self):
     '''Test to see if calling everything sequentially works'''
     from recommender import get_recommendations
     # To test this method we need both the mock for PostgreSQL
     # and the override for the Solr query (for 'get_article_data').
     # The Solr query needs to return the references and citation counts
     # for the papers found in the co-reads
     # The mock data below takes everything into account that was tested
     # before
     mockdata = [
         {'id': '1', 'bibcode': 'ppr1',
          'first_author': 'au_ppr1', 'title': [
              'ttl_ppr1'], 'reference':['r1', 'r2'], 'citation':['c1'],
          'citation_count':1},
         {'id': '2', 'bibcode': 'ppr2', 'first_author': 'au_ppr2',
          'title': ['ttl_ppr2'], 'reference':['r2', 'r3'],
          'citation':['c1', 'c2', 'c3'], 'citation_count':3},
         {'id': '3', 'bibcode': 'ppr3', 'first_author': 'au_ppr3',
          'title': ['ttl_ppr3'], 'reference':['r2', 'r3'],
          'citation':['c2', 'c3'],
          'citation_count':2},
         {'id': '4', 'bibcode': 'foo', 'keyword_norm': [
             "aberration", "ablation", "absorption"]},
         {'id': '5', 'bibcode': 'paper_3',
             'first_author': 'au_paper3', 'title': ['ttl_paper_3']},
         {'id': '6', 'bibcode': 'c1',
             'first_author': 'au_c1', 'title': ['ttl_c1']},
         {'id': '7', 'bibcode': 'r2',
             'first_author': 'au_r2', 'title': ['ttl_r2']}
     ]
     httpretty.register_uri(
         httpretty.GET, self.app.config.get('RECOMMENDER_SOLR_PATH'),
         content_type='application/json',
         status=200,
         body="""{
         "responseHeader":{
         "status":0, "QTime":0,
         "params":{ "fl":"reference,citation", "indent":"true",
         "wt":"json", "q":"*"}},
         "response":{"numFound":10456930,"start":0,"docs":%s
         }}""" % json.dumps(mockdata))
     # With the mock data the following recommendations should get generated
     expected_recommendations = {'paper': 'a',
                                 'recommendations': [
                                    {'bibcode': 'ppr2',
                                     'author': u'au_ppr2,+',
                                     'title': u'ttl_ppr2'},
                                    {'bibcode': 'ppr2',
                                     'author': u'au_ppr2,+',
                                     'title': u'ttl_ppr2'},
                                    {'bibcode': u'ppr1',
                                     'author': u'au_ppr1,+',
                                     'title': u'ttl_ppr1'},
                                    {'bibcode': u'c1',
                                     'author': u'au_c1,+',
                                     'title': u'ttl_c1'},
                                    {'bibcode': u'r2',
                                     'author': u'au_r2,+',
                                     'title': u'ttl_r2'},
                                    {'bibcode': u'ppr2',
                                     'author': u'au_ppr2,+',
                                     'title': u'ttl_ppr2'}]}
     # Generate the recommendations
     recommendations = get_recommendations('a')
     self.assertEqual(recommendations, expected_recommendations)
     # Check that too old publication returns on recommendations
     min_year = self.app.config.get('RECOMMENDER_FROM_YEAR')
     bibcode = "%sApJ...999..999X" % (min_year - 1)
     recommendations = get_recommendations(bibcode)
     expected = {"Error": "Unable to get results!",
             "Error Info": "No recommendations available",
             "Status Code": "200"}
     # Did we get an error message?
     self.assertEqual(recommendations, expected)
     # The same should happen with a journal not in the list of allowed journals
     bibcode = "9999XXXXX.999..999X"
     recommendations = get_recommendations(bibcode)
     self.assertEqual(recommendations, expected)
 def test_everything(self):
     '''Test to see if calling everything sequentially works'''
     from recommender import get_recommendations
     # To test this method we need both the mock for PostgreSQL
     # and the override for the Solr query (for 'get_article_data').
     # The Solr query needs to return the references and citation counts
     # for the papers found in the co-reads
     # The mock data below takes everything into account that was tested
     # before
     mockdata = [
         {'id': '1', 'bibcode': 'ppr1',
          'first_author': 'au_ppr1', 'title': [
              'ttl_ppr1'], 'reference':['r1', 'r2'], 'citation':['c1'],
          'citation_count':1},
         {'id': '2', 'bibcode': 'ppr2', 'first_author': 'au_ppr2',
          'title': ['ttl_ppr2'], 'reference':['r2', 'r3'],
          'citation':['c1', 'c2', 'c3'], 'citation_count':3},
         {'id': '3', 'bibcode': 'ppr3', 'first_author': 'au_ppr3',
          'title': ['ttl_ppr3'], 'reference':['r2', 'r3'],
          'citation':['c2', 'c3'],
          'citation_count':2},
         {'id': '4', 'bibcode': 'foo', 'keyword_norm': [
             "aberration", "ablation", "absorption"]},
         {'id': '5', 'bibcode': 'paper_3',
             'first_author': 'au_paper3', 'title': ['ttl_paper_3']},
         {'id': '6', 'bibcode': 'c1',
             'first_author': 'au_c1', 'title': ['ttl_c1']},
         {'id': '7', 'bibcode': 'r2',
             'first_author': 'au_r2', 'title': ['ttl_r2']}
     ]
     httpretty.register_uri(
         httpretty.GET, self.app.config.get('RECOMMENDER_SOLR_PATH'),
         content_type='application/json',
         status=200,
         body="""{
         "responseHeader":{
         "status":0, "QTime":0,
         "params":{ "fl":"reference,citation", "indent":"true",
         "wt":"json", "q":"*"}},
         "response":{"numFound":10456930,"start":0,"docs":%s
         }}""" % json.dumps(mockdata))
     # With the mock data the following recommendations should get generated
     expected_recommendations = {'paper': 'a',
                                 'recommendations': [
                                    {'bibcode': 'ppr2',
                                     'author': u'au_ppr2,+',
                                     'title': u'ttl_ppr2'},
                                    {'bibcode': 'ppr2',
                                     'author': u'au_ppr2,+',
                                     'title': u'ttl_ppr2'},
                                    {'bibcode': u'ppr1',
                                     'author': u'au_ppr1,+',
                                     'title': u'ttl_ppr1'},
                                    {'bibcode': u'c1',
                                     'author': u'au_c1,+',
                                     'title': u'ttl_c1'},
                                    {'bibcode': u'r2',
                                     'author': u'au_r2,+',
                                     'title': u'ttl_r2'},
                                    {'bibcode': u'ppr2',
                                     'author': u'au_ppr2,+',
                                     'title': u'ttl_ppr2'}]}
     # Generate the recommendations
     recommendations = get_recommendations('a')
     # Do the final check
     self.assertEqual(recommendations, expected_recommendations)