def choose_medium_length_string(l, lmin=5, lmax=18): good_strings = filter( lambda x: len(x.split()) >= lmin and len(x.split()) <= lmax, l) if (len(good_strings) == 0): return None else: return ch(good_strings)
def begin_augmentation (conversation): global count keys = ['scene','character','opinion','critical_review','movie_trivia'] p = [0.25,0.25,0.25,0.125,0.125] l = len(conversation) s = re.sub("<b>You:</b> Umm, let's talk about ",'',conversation[0]) #print(s) #print(type(movie_list)) movie_id = movie_list[s] #print(movie_id) #print(movies[movie_id].title) while(len(conversation)==l): if not keys: #print("Didn't") count = count + 1 break key = ch(keys) conversation = check_if_exists(movie_id,conversation,key) keys.remove(key) return conversation
def check_if_exists( movie_id, key ): #call augmentation directly from here refined logic for breakpoint conversation = None i = -1 l = -1 if key == 'scene': if movies[movie_id].fav_scene: i = ch(np.arange(len(movies[movie_id].fav_scene))) chosen = movies[movie_id].fav_scene[i] conversation = statement_processing(ch(fav_scene_open_statements), chosen) l = 2 elif movie_id in extra_data: chosen = extra_data[movie_id]['fav_scene'] conversation = statement_processing(ch(fav_scene_open_statements), chosen) l = 2 if key == 'character': if movies[movie_id].fav_character: i = ch(np.arange(len(movies[movie_id].fav_character))) chosen = movies[movie_id].fav_character[i] conversation = statement_processing( ch(fav_character_open_statements), chosen) l = 4 elif movie_id in extra_data: chosen = extra_data[movie_id]['fav_character'] conversation = statement_processing( ch(fav_character_open_statements), chosen) l = 4 ''' if key == 'movie_trivia': trivias = movies[movie_id].trivia trivia_chosen = choose_medium_length_string(trivias) if trivia_chosen is not None: conversation = movie_trivia_augmentation(movie_id,conversation,breakpoint,trivia_chosen) ''' if key == 'critical_review': critical_reviews = movies[movie_id].critical_reviews selected_review = choose_medium_length_string(critical_reviews) if selected_review is not None: if selected_review[0].islower(): processed_review = "This movie is " + selected_review conversation = statement_processing( ch(review_intro_statements), processed_review) i = critical_reviews.index(selected_review) l = 8 if key == 'opinion': if movies[movie_id].opinion: i = ch(np.arange(len(movies[movie_id].opinion))) chosen = movies[movie_id].opinion[i] conversation = statement_processing(ch(opinion_open_statements), chosen) l = 3 elif movie_id in extra_data: chosen = extra_data[movie_id]['opinion'] conversation = statement_processing(ch(opinion_open_statements), chosen) l = 3 if key == 'question': if movie_id in questions: if questions[movie_id]: i = ch(np.arange(len(questions[movie_id]))) chosen = questions[movie_id][i] conversation = statement_processing(chosen) l = 7 if key == 'remember': if movies[movie_id].do_you_remember: i = ch(np.arange(len(movies[movie_id].do_you_remember))) chosen = movies[movie_id].do_you_remember[i] conversation = statement_processing(chosen) l = 5 elif movie_id in extra_data: chosen = extra_data[movie_id]['do_you_remember'] conversation = statement_processing(chosen) l = 5 ''' if conversation: chat.append(conversation) index.append(i) legend.append(l) ''' return conversation, i, l
def main(): data = pd.read_csv(fname_input) # change file_name here x = np.arange(len(data)) np.random.shuffle(x) #number of movies you want to give x = x[0:int(sys.argv[2])] keys = ['scene', 'character', 'opinion'] count = np.ones(len(keys)) movie_name = np.array(data['title']) movie_name = movie_name[x] #wiki = data['wiki'] imdb_id = np.array(data['imdb_id']) imdb_id = imdb_id[x] s_movie_name = [] s_wiki = [] s_imdb_id = [] s_chat = [] did_not = [] legend = [] index_ = [] m_plot = [] m_review = [] m_comment = [] s_rev_lengend = [] for m, id_ in zip(movie_name, imdb_id): conversation = None keys = ['scene', 'character', 'opinion'] flag = True while (conversation == None): if not keys: flag = False break key = ch(keys) conversation, i, l = check_if_exists(id_, key) inx = keys.index(key) keys.remove(key) if flag: s_wiki.append('https://en.wikipedia.org/?curid=' + str(movies[id_].wiki_id)) s_chat.append(conversation) s_imdb_id.append(id_) s_movie_name.append(m) index_.append(i) legend.append(l) m_plot.append(movies[id_].plot) temp = "" if id_ in reddit_singles: temp = temp + change_to_html(reddit_singles[id_]) if id_ in review_titles: temp = temp + change_to_html(review_titles[id_]) m_comment.append(temp) if id_ in reviews: m_review.append(convert_to_ascii(reviews[id_][1])) s_rev_lengend.append(1) else: m_review.append("") s_rev_lengend.append(-1) else: did_not.append(id_) print(did_not) ''' for i in did_not: conversation,i_m,l = check_if_exists(i,'scene') s_wiki.append('https://en.wikipedia.org/?curid='+str(movies[i].wiki_id)) s_chat.append(conversation) s_imdb_id.append(i) s_movie_name.append(movies[i].title) index_.append(i_m) legend.append(l) m_plot.append(movies[i].plot) temp = "" if i in reddit_singles: temp = temp + change_to_html(reddit_singles[i]) if id_ in review_titles : temp = temp + change_to_html(review_titles[i]) m_comment.append(temp) if id_ in reviews: m_review.append(convert_to_ascii(reviews[i][0])) s_rev_lengend.append(0) else: m_review.append("") s_rev_lengend.append(-1) ''' m_plot, s_plot_legend = plot_array(m_plot) print(len(s_wiki)) d = { 'comment_1': m_comment, 'plot_1': m_plot, 'review_1': m_review, 'chat_1': s_chat, 'wiki_1': s_wiki, 'imdb_id_1': s_imdb_id, 'movie_name_1': s_movie_name, 'legend_1': legend, 'used_index_1': index_, 'review_legend_1': s_rev_lengend, 'plot_legend_1': s_plot_legend } df = pd.DataFrame(d) df.to_csv('self_chat_batch_' + str(ind) + '.csv', index=False, encoding='utf-8')
from numpy.random import choice as ch import sys """ This code encrypts a string into another string, following thes steps: - first the strings is sliced to 8 vertices cubic strings (as many as needed). The letters are successively placed at vertices following the coordinates: (0,0,0)->(0,1,0)->(1,1,0)->(1,0,0)->(0,0,1)->(0,1,1)->(1,1,1)->(1,0,1) - then the cubes are randomly rotated, a random number of times, independantly, up, down, left or right. - the resulting cube positions are being read following the same path, producing the encrypted string. - the code also embeds the reverse - decrypting - function. """ text = "I aml" encrypted = "t I oamoie tllingt enoti ed" key = "" # INPUT instructions:
def opinion_augmentation(movie_id,conversation,breakpoint): starting_template = add_pre_template(breakpoint) + ch(opinion_open_statements) fav_scene = add_pre_template(breakpoint+1) + ch(movies[movie_id].opinion) return augment_the_conversation(conversation,breakpoint,[starting_template,fav_scene])
def movie_review_augmentation(movie_id,conversation,breakpoint,selected_review,s=" "): s = add_pre_template(breakpoint) + ch(review_intro_statements) + selected_review answer = add_pre_template(breakpoint+1) + ch(mid_review_accept) return augment_the_conversation(conversation,breakpoint,[s,answer])
def movie_trivia_augmentation(movie_id,conversation,breakpoint,trivia_chosen,s=" "): s = add_pre_template(breakpoint) + ch(mid_trivia_open_statements) + trivia_chosen answer = add_pre_template(breakpoint+1) + ch(mid_trivia_accept) return augment_the_conversation(conversation,breakpoint,[s,answer]) #change to s after first batch
def fav_character_augmentation(movie_id,conversation,breakpoint): fav_scene = add_pre_template(breakpoint+1) + ch(movies[movie_id].fav_character) starting_template = add_pre_template(breakpoint) + ch(fav_character_open_statements) return augment_the_conversation(conversation,breakpoint,[starting_template,fav_scene])
def ee_stat_gen_age(): ind = ch(np.arange(len(EE_stat_age_dist)), p=EE_stat_age_prob) return ch(np.arange(EE_stat_age_dist[ind][0], EE_stat_age_dist[ind][1] + 1))
def main(): data = pd.read_csv('/home/nikita/Downloads/50 movies - Sheet2.csv' ) # change file_name here keys = [ 'scene', 'character', 'opinion', 'critical_review', 'question', 'remember' ] count = np.ones(len(keys)) movie_name = data['title'] wiki = data['wiki'] imdb_id = data['imdb_id'] s_movie_name = [] s_wiki = [] s_imdb_id = [] s_chat = [] did_not = [] legend = [] index_ = [] m_plot = [] m_review = [] m_comment = [] for m, w, id_ in zip(movie_name, wiki, imdb_id): conversation = None keys = [ 'scene', 'character', 'opinion', 'critical_review', 'question', 'remember' ] flag = True while (conversation == None): if not keys: flag = False break key = ch(keys) conversation, i, l = check_if_exists(id_, key) inx = keys.index(key) keys.remove(key) if flag: s_wiki.append(w) s_chat.append(conversation) s_imdb_id.append(id_) s_movie_name.append(m) index_.append(i) legend.append(l) m_plot.append(movies[id_].plot) temp = "" if id_ in reddit_singles: temp = temp + change_to_html(reddit_singles[id_]) if id_ in review_titles: temp = temp + change_to_html(review_titles[id_]) m_comment.append(temp) if id_ in reviews: m_review.append(convert_to_ascii(reviews[id_][0])) else: m_review.append("") keys = [ 'scene', 'character', 'opinion', 'critical_review', 'question', 'remember' ] inx = keys.index(key) count[inx] = count[inx] + 1 else: did_not.append(id_) print(did_not) for i in did_not: conversation, i_m, l = check_if_exists(i, 'scene') s_wiki.append(w) s_chat.append(conversation) s_imdb_id.append(id_) s_movie_name.append(m) index_.append(i_m) legend.append(l) m_plot.append(movies[i].plot) temp = "" if i in reddit_singles: temp = temp + change_to_html(reddit_singles[i]) if id_ in review_titles: temp = temp + change_to_html(review_titles[i]) m_comment.append(temp) if id_ in reviews: m_review.append(convert_to_ascii(reviews[i][0])) else: m_review.append("") print(len(s_wiki)) print(len(s_chat)) print(len(s_imdb_id)) print(did_not) ''' d = {'chat_1': s_chat[0:10], 'wiki_1' : s_wiki[0:10], 'imdb_id_1': s_imdb_id[0:10], 'movie_name_1': s_movie_name[0:10], 'legend_1': legend[0:10], 'used_index_1': index_[0:10], 'chat_2': s_chat[10:20], 'wiki_2' : s_wiki[10:20], 'imdb_id_2': s_imdb_id[10:20], 'movie_name_2': s_movie_name[10:20], 'legend_2': legend[10:20], 'used_index_2': index_[10:20], 'chat_3': s_chat[20:30], 'wiki_3' : s_wiki[20:30] ,'imdb_id_3': s_imdb_id[20:30], 'movie_name_3': s_movie_name[20:30], 'legend_3': legend[20:30], 'used_index_3': index_[20:30], 'chat_4': s_chat[30:40], 'wiki_4' : s_wiki[30:40], 'imdb_id_4': s_imdb_id[30:40], 'movie_name_4': s_movie_name[30:40], 'legend_4': legend[30:40], 'used_index_4': index_[30:40], 'chat_5': s_chat[40:], 'wiki_5' : s_wiki[40:], 'imdb_id_5': s_imdb_id[40:], 'movie_name54': s_movie_name[40:], 'legend_5': legend[40:], 'used_index_5': index_[40:], 'plot_1': m_plot[0:10], 'plot_2' : m_plot[10:20], 'plot_3': m_plot[20:30], 'plot_4': m_plot[30:40], 'plot_5': m_plot[40:], 'review_1': m_review[0:10], 'review_2' : m_review[10:20], 'review_3': m_review[20:30], 'review_4': m_review[30:40], 'review_5': m_review[40:], 'comment_1': m_comment[0:10], 'comment_2' : m_comment[10:20], 'comment_3': m_comment[20:30], 'comment_4': m_comment[30:40], 'comment_5': m_comment[40:] } ''' d = { 'chat_1': s_chat[0:25], 'wiki_1': s_wiki[0:25], 'imdb_id_1': s_imdb_id[0:25], 'movie_name_1': s_movie_name[0:25], 'legend_1': legend[0:25], 'used_index_1': index_[0:25], 'chat_2': s_chat[25:], 'wiki_2': s_wiki[25:], 'imdb_id_2': s_imdb_id[25:], 'movie_name_2': s_movie_name[25:], 'legend_2': legend[25:], 'used_index_2': index_[25:], 'plot_1': m_plot[0:25], 'plot_2': m_plot[25:], 'review_1': m_review[0:25], 'review_2': m_review[25:], 'comment_1': m_comment[0:25], 'comment_2': m_comment[25:] } df = pd.DataFrame(d) df.to_csv('augmented_start_batch_50_2_1.csv', index=False) print('Complete')