コード例 #1
0
def get_similar_films(db, main_film_id, weights=[1,1,1,1,1,1]):
    # Given a Film ID and feature weights, calculate distances of this film to all films in the database
    # using a batch_size of 100 films at a time
    # @return: Film instance of main film, list of Film instances of 100 nearest neighbours

    num_films = 100
    
    main_film = Film(main_film_id)
    vectors = main_film.build_vectors(db)
    if vectors is None:
        return None
    else:
        main_film.set_vectors(vectors)
    main_film.set_metadata(db)
    
    similar_films = []
    
    batch_size = 100
    offset = 0
    while True:
        vector_matrix = build_vector_matrix(db, batch_size, offset)
        if vector_matrix is None:
            break
        
        distance_matrix = calculate_distance_matrix(main_film.vectors, vector_matrix, weights)
        distance_matrix = [film for film in distance_matrix if int(film[0]) != int(main_film_id)]
        distance_matrix = [(distance_matrix[i] + (vector_matrix[i][1:],)) for i in range(0,len(distance_matrix)) 
                               if distance_matrix[i][0] == vector_matrix[i][0]]
        distance_matrix = sorted(distance_matrix, key=lambda x:x[1])
        
        similar_films += distance_matrix[:num_films]
        
        offset += batch_size
        
    similar_films = sorted(similar_films, key=lambda x:x[1])
    similar_films = similar_films[:num_films]
    
    similar_films_instances = []
    
    for film_id, distance, vectors in similar_films:
        similar_film = Film(film_id)
        similar_film.set_distance(main_film_id, distance)
        similar_film.set_metadata(db)
        similar_film.set_vectors(vectors)
        similar_films_instances.append(similar_film)
    
    vector_matrix = [(film.id,) + film.vectors for film in similar_films_instances]
    for film in vector_matrix:
        distance_matrix = calculate_distance_matrix(film, vector_matrix, weights)
        for i, (film_id, dist) in enumerate(distance_matrix):
            if similar_films_instances[i].id != film_id:
                print("error!")
            similar_films_instances[i].set_distance(film[0], dist)
        
    return main_film, similar_films_instances
コード例 #2
0
def get_film(db, main_film_id):
    # Get Film instance for given ID from database
    # @return: Film instance
    main_film = Film(main_film_id)
    vectors = main_film.build_vectors(db)
    if vectors is None:
        return None
    main_film.set_vectors(vectors)
    main_film.set_metadata(db)
        
    return main_film