Exemple #1
0
 def test_similarity_matrix_basic(self):
     self.assertDictEqual(similarity_matrix({
         1: {'a'},
         2: {'a'}
     }), {
         1: {
             1: 1.0,
             2: 1.0
         },
         2: {
             1: 1.0,
             2: 1.0
         }
     })
     self.assertDictEqual(similarity_matrix({
         1: {'a'},
         2: {'b'}
     }), {
         1: {
             1: 1.0,
             2: 0.0
         },
         2: {
             1: 0.0,
             2: 1.0
         }
     })
Exemple #2
0
 def test_asymmetric_similarity_returns_superset_of_jaccard(self):
     sets = read_file('testdata.csv')
     similarity1 = similarity_matrix(sets)
     similarity2 = similarity_matrix(sets, asymmetric_similarity)
     for i in sets.keys():
         self.assertTrue(
             recommendations(i, sets, similarity1, 0.25).issubset(
                 recommendations(i, sets, similarity2, 0.25)))
Exemple #3
0
    def test_similar_users(self):
        similarity = similarity_matrix({1: {'a'}, 2: {'a'}})
        self.assertEqual(similar_users(1, similarity, 0.2), [2])
        self.assertEqual(similar_users(2, similarity, 0.2), [1])
        self.assertEqual(similar_users(1, similarity, 1.0), [2])

        similarity = similarity_matrix({1: {'a'}, 2: {'b'}})
        self.assertEqual(similar_users(1, similarity, 0.2), [])

        similarity = similarity_matrix(read_file('testdata.csv'))
        self.assertEqual(similar_users(1, similarity, 0.2), [3])
        self.assertEqual(similar_users(2, similarity, 0.15), [1, 4])
Exemple #4
0
 def test_minhash_with_testdata(self):
     sets = read_file('testdata.csv')
     similarity = similarity_matrix(sets, minhash_similarity)
     self.assertEqual(recommendations(1, sets, similarity, 0.75), {42})
     self.assertFalse(recommendations(3, sets, similarity, 0.75))
     self.assertEqual(recommendations(1, sets, similarity, 0.15),
                      (sets[2] | sets[3]) - sets[1])
Exemple #5
0
 def test_recommendations_with_zero_cutoff_returns_all_other_products(self):
     sets = read_file('testdata.csv')
     similarity = similarity_matrix(sets)
     for i in sets.keys():
         self.assertEqual(
             recommendations(i, sets, similarity, 0),
             reduce(lambda a, b: a | b, sets.values(), set()) - sets[i])
Exemple #6
0
 def test_asymmetric_similarity(self):
     self.assertEqual(asymmetric_similarity({'a'}, {'a', 'b'}), 1)
     self.assertEqual(asymmetric_similarity({'a', 'b'}, {'a'}), 0.5)
     sets = {1: {'a'}, 2: {'a', 'b'}}
     similarity = similarity_matrix(sets, asymmetric_similarity)
     self.assertDictEqual(similarity, {
         1: {
             1: 1.0,
             2: 1.0
         },
         2: {
             1: 0.5,
             2: 1.0
         }
     })
Exemple #7
0
 def test_similarity_matrix_with_testdata(self):
     self.assertDictEqual(
         similarity_matrix(read_file('testdata.csv')), {
             1: {
                 1: 1.0,
                 2: 0.16666666666666666,
                 3: 0.75,
                 4: 0.0,
                 5: 0.0
             },
             2: {
                 1: 0.16666666666666666,
                 2: 1.0,
                 3: 0.14285714285714285,
                 4: 0.4,
                 5: 0.0
             },
             3: {
                 1: 0.75,
                 2: 0.14285714285714285,
                 3: 1.0,
                 4: 0.0,
                 5: 0.0
             },
             4: {
                 1: 0.0,
                 2: 0.4,
                 3: 0.0,
                 4: 1.0,
                 5: 0.0
             },
             5: {
                 1: 0.0,
                 2: 0.0,
                 3: 0.0,
                 4: 0.0,
                 5: 1.0
             }
         })
Exemple #8
0
        '--user',
        default='1',
        type=str,
        help='IDs (comma-separated) of the user(s) to receive recommendations')
    parser.add_argument('--set-comparison',
                        default='jaccard_coefficient',
                        help='Method used to compare two sets [{}]'.format(
                            '|'.join(SIMILARITY_FUNCTIONS.keys())))
    parser.add_argument(
        '--cutoff',
        default=0.25,
        type=float,
        help=
        'Value for similarity of liked products above which two users are considered similar'
    )

    return parser.parse_args()


args = parse()

set_comparison = SIMILARITY_FUNCTIONS[args.set_comparison]
liked_items = read_file(args.filename)
users: List[Union[int, str]] = [int(u) for u in args.user.split(',')]

similarity = similarity_matrix(liked_items, set_comparison, users)

for user in users:
    recommended = recommendations(user, liked_items, similarity, args.cutoff)
    print('User: {:>4d} Recommendations: {}'.format(user, recommended))
Exemple #9
0
 def test_similarity_matrix_elements_equal_to_themselves(self):
     larger_list = {i: {i} for i in range(100)}
     larger_list_matrix = similarity_matrix(larger_list)
     self.assertEqual(len(larger_list_matrix), len(larger_list))
     for i in range(len(larger_list)):
         self.assertEqual(larger_list_matrix[i][i], 1.)
Exemple #10
0
 def test_recommendations(self):
     sets = {1: {'a'}, 2: {'a', 'b'}}
     similarity = similarity_matrix(sets)
     self.assertEqual(recommendations(1, sets, similarity, 0.4), {'b'})
     self.assertEqual(recommendations(2, sets, similarity, 0.4), set())