def disambiguate(self, clarification, candidates): """Creative Feature: Given a list of movies that the user could be talking about (represented as indices), and a string given by the user as clarification (eg. in response to your bot saying "Which movie did you mean: Titanic (1953) or Titanic (1997)?"), use the clarification to narrow down the list and return a smaller list of candidates (hopefully just 1!) - If the clarification uniquely identifies one of the movies, this should return a 1-element list with the index of that movie. - If it's unclear which movie the user means by the clarification, it should return a list with the indices it could be referring to (to continue the disambiguation dialogue). Example: chatbot.disambiguate("1997", [1359, 2716]) should return [1359] :param clarification: user input intended to disambiguate between the given movies :param candidates: a list of movie indices :returns: a list of indices corresponding to the movies identified by the clarification """ all_movies = movielens.titles() ans = [] for index in candidates: title_info = all_movies[index] title = title_info[0] genre = title_info[1] title = title.lower() clarification = clarification.lower() if clarification in title or clarification in genre: ans.append(index) return ans
def helper_find_movies_by_title2(self, text): movie_titles = movielens.titles() if not self.helper_find_movies_by_title_begun: self.helper_find_movies_by_title_begun = True self.movieIndexes = self.find_movies_by_title(self.moviename) if not self.movieIndexes: self.movieIndexes = self.find_movies_closest_to_title( self.moviename) if not self.movieIndexes: self.reset_to_beginning("") return "We could not find a movie with that title. Please try again.\n" if len(self.movieIndexes) > 1: prompt = "\nI found a lot of movies that matches under " + self.moviename + "\n" prompt += "Which movie did you mean: " for index in enumerate(self.movieIndexes): prompt += "\"" + str(movie_titles[(index[1])][0]) + "\"" if index[0] < len(self.movieIndexes) - 2: prompt += ', ' elif index[0] == (len(self.movieIndexes) - 1): prompt += '?' else: prompt += ' or ' return prompt if len(self.movieIndexes) == 1: self.movieIndex = self.movieIndexes[0] self.moviename = movie_titles[self.movieIndex][0] self.step_count = 2 return self.SENTINAL self.movieIndexes = self.disambiguate(text, self.movieIndexes) if len(self.movieIndexes) == 0: self.reset_to_beginning("") return "What are you saying??? Let's just start over please." if len(self.movieIndexes) == 1: self.movieIndex = self.movieIndexes[0] self.moviename = movie_titles[self.movieIndex][0] self.step_count = 2 return self.SENTINAL if len(self.movieIndexes) > 1: prompt = "\nI found a lot of movies that matches under " + self.moviename + "\n" prompt += "Which movie did you mean: " for index in enumerate(self.movieIndexes): prompt += "\"" + str(movie_titles[(index[1])][0]) + "\"" if index[0] < len(self.movieIndexes) - 2: prompt += ', ' elif index[0] == (len(self.movieIndexes) - 1): prompt += '?' else: prompt += ' or ' return prompt return self.SENTINAL
def helper_recommend(self, text): all_movies = movielens.titles() if not self.helper_recommend_begun: self.helper_recommend_begun = True if len(self.userMovieMap) > self.MOVIELIMIT: self.userMovieRatings = np.zeros((len(movielens.titles()))) for k, v in self.userMovieMap.items(): self.userMovieRatings[k] = v binUserRatings = self.binarize(self.userMovieRatings) binRatings = self.binarize(self.ratings) self.listOfReccomendations = self.recommend( binUserRatings, binRatings) prompt = ( "Given what you told me, I think you would like the following movies: " ) prompt += str( all_movies[self.listOfReccomendations[0]][0]) + " " + str( all_movies[self.listOfReccomendations[1]] [0]) + " " + str( all_movies[self.listOfReccomendations[2]][0] ) + ". Would you like to hear more recommendations?" return prompt else: self.reset_to_beginning("") return self.sentiment_message + "Can you tell me about another movie? \n" else: prompt = '' if 'ye' in text or 'ya' in text: prompt += "\nThe rest of the movies are: " for index in range(3, len(self.listOfReccomendations)): prompt += str(all_movies[index][0]) + "\n" prompt += "Thank you so much for trying me out and I hope that you continue using me :) For now, I'll forget every movie you used to like and restart over \n" prompt += "Please tell me your thoughts on some movies, so I can recommend some new movies to you :) \n" self.reset_to_beginning("") self.userMovieMap.clear() self.listOfPotentialMovies.clear() self.listOfReccomendations.clear() return prompt
def helper_find_movies_by_title(self, text): movie_titles = movielens.titles() if not self.helper_find_movies_by_title_begun: self.helper_find_movies_by_title_begun = True self.movieIndexes = self.find_movies_by_title(self.moviename) if not self.movieIndexes: self.movieIndexes = self.find_movies_by_title( self.moviename.title()) if not self.movieIndexes: self.reset_to_beginning("") return "We could not find a movie with that title. Please try again.\n" if len(self.movieIndexes) > 1: prompt = "\nI found a lot of movies that matches under " + self.moviename + "\n" prompt += "Can you please enter the number that best matches the movie you wanted? or say anything else? \n" for index in enumerate(self.movieIndexes): prompt += "(" + str(index[0]) + ") : " + str( movie_titles[(index[1])][0]) + "\n" return prompt if len(self.movieIndexes) == 1: self.movieIndex = self.movieIndexes[0] self.moviename = movie_titles[self.movieIndex][0] self.step_count = 2 return self.SENTINAL try: index = int(text) except ValueError: self.reset_to_beginning("") return "I don't understand that... Let's start again?" if index > (len(self.movieIndexes) - 1) or index < 0: self.reset_to_beginning("") return "I understand that you don't want none of those...Let's start again." self.movieIndex = self.movieIndexes[index] self.moviename = movie_titles[self.movieIndex][0] # was on set1 now going on to step 2 self.step_count = 2 return self.SENTINAL
def find_movies_by_title(self, title): """ Given a movie title, return a list of indices of matching movies. - If no movies are found that match the given title, return an empty list. - If multiple movies are found that match the given title, return a list containing all of the indices of these matching movies. - If exactly one movie is found that matches the given title, return a list that contains the index of that matching movie. Example: ids = chatbot.find_movies_by_title('Titanic') print(ids) // prints [1359, 1953] :param title: a string containing a movie title :returns: a list of indices of matching movies """ title = self.process_title(title) id_list = [] movie_list = movielens.titles() for i in range(len(movie_list)): if title in movie_list[i][0].lower(): id_list.append(i) return id_list
def find_movies_closest_to_title(self, title, max_distance=3): """Creative Feature: Given a potentially misspelled movie title, return a list of the movies in the dataset whose titles have the least edit distance from the provided title, and with edit distance at most max_distance. - If no movies have titles within max_distance of the provided title, return an empty list. - Otherwise, if there's a movie closer in edit distance to the given title than all other movies, return a 1-element list containing its index. - If there is a tie for closest movie, return a list with the indices of all movies tying for minimum edit distance to the given movie. Example: chatbot.find_movies_closest_to_title("Sleeping Beaty") # should return [1656] :param title: a potentially misspelled title :param max_distance: the maximum edit distance to search for :returns: a list of movie indices with titles closest to the given title and within edit distance max_distance """ title = self.process_title(title) id_list = [] movie_list = movielens.titles() editDistances = {} minEditDistance = math.inf for i in range(len(movie_list)): movie = self.process_title(movie_list[i][0]).lower() editDistance = self.edit_distance(movie, title, max_distance) movie = re.sub("\s\((\d{4})\)", "", movie) # remove date if re.search(", the\Z", movie) != None: # switch 'the" to beginning of sentence movie = "the " + re.sub(", the\Z", "", movie) editDistance_YearRemoved = self.edit_distance(movie, title, max_distance) # update new minimum edit distance if editDistance < minEditDistance and editDistance != -1: minEditDistance = editDistance if editDistance_YearRemoved < minEditDistance and editDistance_YearRemoved != -1: minEditDistance = editDistance_YearRemoved if editDistance <= max_distance and editDistance != -1: if editDistance in editDistances: editDistances[editDistance].append(i) else: editDistances[editDistance] = [i] elif editDistance_YearRemoved <= max_distance and editDistance_YearRemoved != -1: if editDistance_YearRemoved in editDistances: editDistances[editDistance_YearRemoved].append(i) else: editDistances[editDistance_YearRemoved] = [i] #Find all movies that are the minimum edit distance away options = editDistances[minEditDistance] for i in options: id_list.append(i) return id_list
def find_movies_helper(self, text): id_list = [] titles = [] titles = titles + re.findall('"([^"]*)"', text) if self.creative and titles != []: # strip text of case and punctuation text = text.lower() text = re.sub(r'[,\'!?:]', '', text) alt_title_dict = {} movie_list = movielens.titles() for j in range(len(movie_list)): movie = movie_list[j] movie_stripped = "" matched = False # strip movie of case and year original_movie = movie_list[j][0].lower() # make lowercase original_movie = self.process_title_reverse(original_movie) date = re.findall(' \(\d{4}\)', original_movie) # turn Notebook, The into The Notebook movie_stripped = self.process_title_reverse( re.sub(' \(\d{4}\)', '', original_movie)) movie_stripped = re.sub(r'[.,\':]', '', movie_stripped) movie_with_date = movie_stripped #The Notebook (2007) if len(date) > 0: movie_with_date = movie_stripped + date[0] alt_titles = re.findall( ' \(.[^\)\(]*\)', movie_stripped) # find foreign titles in parenthesis if len(alt_titles) > 0: for i in range(len(alt_titles)): alt_title = re.sub('[\(\)]', '', alt_titles[i]) alt_title = self.process_title_reverse( re.sub('aka ', '', alt_title).lstrip()) if (alt_title in text.split() or set( alt_title.split()).issubset(text.split()) ) and alt_title.lower() != 'not': titles.append(movie_stripped) matched = True #Original movies is a list of the official names of all movies theyre currently asking about id_list.append(j) movie_with_parens = movie_stripped movie_stripped = re.sub( ' \(.*\)', '', movie_stripped) # remove any extra parenthesis # if they entered it in with the date, we want to return the date if movie_with_date in text and not matched: titles.append(movie_with_date) id_list.append(j) matched = True movie_with_date_no_parens = re.sub('\(.[^\d{4}]*.\)', '', original_movie) if movie_with_date_no_parens in text and not matched: titles.append(movie_with_date_no_parens) id_list.append(j) matched = True # # handles case of one movie 'toy story' "i like toy story" if re.search(r"\b" + re.escape(movie_stripped) + r"\b", text) and not matched: titles.append(movie_stripped) id_list.append(j) titles = list(set(titles)) return [id_list, titles]