def get_similar_films(db, main_film_id, weights=[1,1,1,1,1,1]): # Given a Film ID and feature weights, calculate distances of this film to all films in the database # using a batch_size of 100 films at a time # @return: Film instance of main film, list of Film instances of 100 nearest neighbours num_films = 100 main_film = Film(main_film_id) vectors = main_film.build_vectors(db) if vectors is None: return None else: main_film.set_vectors(vectors) main_film.set_metadata(db) similar_films = [] batch_size = 100 offset = 0 while True: vector_matrix = build_vector_matrix(db, batch_size, offset) if vector_matrix is None: break distance_matrix = calculate_distance_matrix(main_film.vectors, vector_matrix, weights) distance_matrix = [film for film in distance_matrix if int(film[0]) != int(main_film_id)] distance_matrix = [(distance_matrix[i] + (vector_matrix[i][1:],)) for i in range(0,len(distance_matrix)) if distance_matrix[i][0] == vector_matrix[i][0]] distance_matrix = sorted(distance_matrix, key=lambda x:x[1]) similar_films += distance_matrix[:num_films] offset += batch_size similar_films = sorted(similar_films, key=lambda x:x[1]) similar_films = similar_films[:num_films] similar_films_instances = [] for film_id, distance, vectors in similar_films: similar_film = Film(film_id) similar_film.set_distance(main_film_id, distance) similar_film.set_metadata(db) similar_film.set_vectors(vectors) similar_films_instances.append(similar_film) vector_matrix = [(film.id,) + film.vectors for film in similar_films_instances] for film in vector_matrix: distance_matrix = calculate_distance_matrix(film, vector_matrix, weights) for i, (film_id, dist) in enumerate(distance_matrix): if similar_films_instances[i].id != film_id: print("error!") similar_films_instances[i].set_distance(film[0], dist) return main_film, similar_films_instances
def get_film(db, main_film_id): # Get Film instance for given ID from database # @return: Film instance main_film = Film(main_film_id) vectors = main_film.build_vectors(db) if vectors is None: return None main_film.set_vectors(vectors) main_film.set_metadata(db) return main_film