Ejemplo n.º 1
0
    def get_correlation_coefficient_from_titles(self, title_a, title_b):
        movie_a_ids = loadmovielens.give_me_movie_id(title_a, self.items_dictionary)
        movie_b_ids = loadmovielens.give_me_movie_id(title_b, self.items_dictionary)

        if len(movie_a_ids) == 0 or len(movie_b_ids) == 0:
            exit("Movie titles not found")
        if len(movie_a_ids) > 1 or len(movie_b_ids) > 1:
            exit("Ambigous search titles")

        return self.get_correlation_coefficient_from_ids(movie_a_ids[0][0], movie_b_ids[0][0])
Ejemplo n.º 2
0
    """
    test_cases = ast.literal_eval(sys.argv[1])
    results = str(my_info()) + '\t\t'
    for test_case in test_cases:
        mode = test_case[0]
        id_1 = int(test_case[1])
        id_2 = int(test_case[2])
        if mode == 'jc':
            results += str(Jaccard_Coefficient(id_1, id_2)) + '\t\t'
        elif mode == 'cc':
            results += str(Correlation_Coefficient(id_1, id_2)) + '\t\t'
        else:
            exit('bad command')
    print results + '\n'

if __name__ == '__main__':
   main()
   """print my_info()"""
   """print reader.read_movie_lens_data()"""
   print reader.give_me_movie_id('story', movie_dictionary)
   print reader.give_me_movie_id('GoldenEye', movie_dictionary)
   ratings, movie_dictionary, user_ids, item_ids, movie_names = reader.read_movie_lens_data()
   print len(ratings)
   i = 0
   for x in range(0, len(ratings)):"search full db, when find, sum ratings
       i += 1
       """print ratings[i], movie_names[i]"""
   print "i %d" % (i)
   print ratings[1], movie_names[1]

    for movie_id in map_from_movie_to_user_id_list:
        ret.append((movie_id,
                    fast_jaccard_coefficient(map_from_movie_to_user_id_list[movie_id],
                                             map_from_movie_to_user_id_list[target_movie_id])))

    ret = sorted(ret, key=lambda x: -x[1])

    while len(ret) > k + 1:
        ret.pop()

    return ret[1:k + 1]


print "--- Jaccard coefficient between 'Toy Story' and 'GoldenEye' ---"
toy_story_id = reader.give_me_movie_id('Toy Story', movie_dictionary)[0][0]
golden_eye_id = reader.give_me_movie_id('GoldenEye', movie_dictionary)[0][0]

print jaccard_coefficient(toy_story_id, golden_eye_id)

print "--- Jaccard coefficient between 'Three Colors: Red' and 'Three Colors: Blue' ---"
red_id = reader.give_me_movie_id('Three Colors: Red', movie_dictionary)[0][0]
blue_id = reader.give_me_movie_id('Three Colors: Blue', movie_dictionary)[0][0]

print jaccard_coefficient(red_id, blue_id)

print "--- Closest 5 movies to 'Taxi Driver'---"
taxi_driver_id = reader.give_me_movie_id('Taxi Driver', movie_dictionary)[0][0]
taxi_passengers = find_k_closest(taxi_driver_id, 5)

for t in taxi_passengers:
Ejemplo n.º 4
0
    users1 = set(seen[id].keys())
    for movie in item_ids:
        users2 = set(seen[movie].keys())
        seenBoth = users1 & users2
        if len(seenBoth) < 3:
            continue
        scores = []
        users = []
        for user in seenBoth:
            scores = np.append(scores, seen[id][user])
            scores = np.append(scores, seen[movie][user])
            users = np.append(users, user)
            users = np.append(users, user)

        coef = np.corrcoef(scores, users)[0][1]
        corcoefs[coef] = movie

    order = sorted(corcoefs, reverse=True)
    print movie_dictionary[id], id
    print "-----"
    for i in range(1, 6):
        print movie_dictionary[corcoefs[
            order[i]]], "Correlation coefficient:", round(order[i], 3)


id1 = reader.give_me_movie_id('toy', movie_dictionary)[0][0]
id2 = reader.give_me_movie_id('little big', movie_dictionary)[0][0]

#corCoef(id1, id2)
corCoefOne(id1)
    for movie_id in map_from_movie_to_user_id_list:
        ret.append((movie_id,
                    fast_jaccard_coefficient(
                        map_from_movie_to_user_id_list[movie_id],
                        map_from_movie_to_user_id_list[target_movie_id])))

    ret = sorted(ret, key=lambda x: -x[1])

    while len(ret) > k + 1:
        ret.pop()

    return ret[1:k + 1]


print "--- Jaccard coefficient between 'Toy Story' and 'GoldenEye' ---"
toy_story_id = reader.give_me_movie_id('Toy Story', movie_dictionary)[0][0]
golden_eye_id = reader.give_me_movie_id('GoldenEye', movie_dictionary)[0][0]

print jaccard_coefficient(toy_story_id, golden_eye_id)

print "--- Jaccard coefficient between 'Three Colors: Red' and 'Three Colors: Blue' ---"
red_id = reader.give_me_movie_id('Three Colors: Red', movie_dictionary)[0][0]
blue_id = reader.give_me_movie_id('Three Colors: Blue', movie_dictionary)[0][0]

print jaccard_coefficient(red_id, blue_id)

print "--- Closest 5 movies to 'Taxi Driver'---"
taxi_driver_id = reader.give_me_movie_id('Taxi Driver', movie_dictionary)[0][0]
taxi_passengers = find_k_closest(taxi_driver_id, 5)

for t in taxi_passengers:
Ejemplo n.º 6
0
        seenBoth = users1 & users2

        if len(seenBoth) < len(users1) // 5:
            continue

        scores1 = []
        scores2 = []
        for user in seenBoth:
            scores1 = np.append(scores1, seen[id][user])
            scores2 = np.append(scores2, seen[movie][user])

        if np.std(scores1) == 0 or np.std(scores2) == 0:
            continue
        coef = np.corrcoef(scores1, scores2)[0][1]

        corcoefs[coef] = movie
        n[coef] = len(seenBoth)

    order = sorted(corcoefs, reverse=True)
    print movie_dictionary[id], id
    print "-----"
    for i in range(1, 6):
        print movie_dictionary[corcoefs[order[i]]], "Correlation:", round(
            order[i], 3), len(seen[corcoefs[order[i]]])


id1 = reader.give_me_movie_id('Three colors', movie_dictionary)[0][0]

jaccardForOne(id1)
corCoefOne(id1)