Beispiel #1
0
def choose_medium_length_string(l, lmin=5, lmax=18):
    good_strings = filter(
        lambda x: len(x.split()) >= lmin and len(x.split()) <= lmax, l)
    if (len(good_strings) == 0):
        return None
    else:
        return ch(good_strings)
def begin_augmentation (conversation):
	global count
	keys = ['scene','character','opinion','critical_review','movie_trivia']
	p = [0.25,0.25,0.25,0.125,0.125]
	l = len(conversation)
	s = re.sub("<b>You:</b> Umm, let's talk about ",'',conversation[0])
	#print(s)
	#print(type(movie_list))
	movie_id = movie_list[s]
	#print(movie_id)
	#print(movies[movie_id].title)
	while(len(conversation)==l):
		if not keys:
			#print("Didn't")
			count = count + 1
			break
		key = ch(keys)
		conversation = check_if_exists(movie_id,conversation,key)
		keys.remove(key)
		
	return conversation
Beispiel #3
0
def check_if_exists(
        movie_id, key
):  #call augmentation directly from here refined logic for breakpoint

    conversation = None
    i = -1
    l = -1

    if key == 'scene':
        if movies[movie_id].fav_scene:
            i = ch(np.arange(len(movies[movie_id].fav_scene)))
            chosen = movies[movie_id].fav_scene[i]
            conversation = statement_processing(ch(fav_scene_open_statements),
                                                chosen)
            l = 2
        elif movie_id in extra_data:
            chosen = extra_data[movie_id]['fav_scene']
            conversation = statement_processing(ch(fav_scene_open_statements),
                                                chosen)
            l = 2

    if key == 'character':
        if movies[movie_id].fav_character:
            i = ch(np.arange(len(movies[movie_id].fav_character)))
            chosen = movies[movie_id].fav_character[i]
            conversation = statement_processing(
                ch(fav_character_open_statements), chosen)
            l = 4
        elif movie_id in extra_data:
            chosen = extra_data[movie_id]['fav_character']
            conversation = statement_processing(
                ch(fav_character_open_statements), chosen)
            l = 4
    '''		

	if key == 'movie_trivia':
		trivias = movies[movie_id].trivia
		trivia_chosen = choose_medium_length_string(trivias)
		if trivia_chosen is not None:
			conversation = movie_trivia_augmentation(movie_id,conversation,breakpoint,trivia_chosen)
	'''

    if key == 'critical_review':
        critical_reviews = movies[movie_id].critical_reviews
        selected_review = choose_medium_length_string(critical_reviews)
        if selected_review is not None:
            if selected_review[0].islower():
                processed_review = "This movie is " + selected_review
                conversation = statement_processing(
                    ch(review_intro_statements), processed_review)
                i = critical_reviews.index(selected_review)
                l = 8

    if key == 'opinion':
        if movies[movie_id].opinion:
            i = ch(np.arange(len(movies[movie_id].opinion)))
            chosen = movies[movie_id].opinion[i]
            conversation = statement_processing(ch(opinion_open_statements),
                                                chosen)
            l = 3
        elif movie_id in extra_data:
            chosen = extra_data[movie_id]['opinion']
            conversation = statement_processing(ch(opinion_open_statements),
                                                chosen)
            l = 3

    if key == 'question':
        if movie_id in questions:
            if questions[movie_id]:
                i = ch(np.arange(len(questions[movie_id])))
                chosen = questions[movie_id][i]
                conversation = statement_processing(chosen)
                l = 7

    if key == 'remember':
        if movies[movie_id].do_you_remember:
            i = ch(np.arange(len(movies[movie_id].do_you_remember)))
            chosen = movies[movie_id].do_you_remember[i]
            conversation = statement_processing(chosen)
            l = 5
        elif movie_id in extra_data:
            chosen = extra_data[movie_id]['do_you_remember']
            conversation = statement_processing(chosen)
            l = 5
    '''		
	if conversation:
		 chat.append(conversation)              
		 index.append(i)
		 legend.append(l)
	'''

    return conversation, i, l
Beispiel #4
0
def main():
    data = pd.read_csv(fname_input)  # change file_name here
    x = np.arange(len(data))
    np.random.shuffle(x)  #number of movies you want to give
    x = x[0:int(sys.argv[2])]
    keys = ['scene', 'character', 'opinion']
    count = np.ones(len(keys))
    movie_name = np.array(data['title'])
    movie_name = movie_name[x]
    #wiki = data['wiki']
    imdb_id = np.array(data['imdb_id'])
    imdb_id = imdb_id[x]
    s_movie_name = []
    s_wiki = []
    s_imdb_id = []
    s_chat = []
    did_not = []
    legend = []
    index_ = []
    m_plot = []
    m_review = []
    m_comment = []
    s_rev_lengend = []

    for m, id_ in zip(movie_name, imdb_id):
        conversation = None
        keys = ['scene', 'character', 'opinion']
        flag = True
        while (conversation == None):
            if not keys:
                flag = False
                break
            key = ch(keys)
            conversation, i, l = check_if_exists(id_, key)
            inx = keys.index(key)
            keys.remove(key)

        if flag:
            s_wiki.append('https://en.wikipedia.org/?curid=' +
                          str(movies[id_].wiki_id))
            s_chat.append(conversation)
            s_imdb_id.append(id_)
            s_movie_name.append(m)
            index_.append(i)
            legend.append(l)
            m_plot.append(movies[id_].plot)
            temp = ""
            if id_ in reddit_singles:
                temp = temp + change_to_html(reddit_singles[id_])
            if id_ in review_titles:
                temp = temp + change_to_html(review_titles[id_])
            m_comment.append(temp)
            if id_ in reviews:
                m_review.append(convert_to_ascii(reviews[id_][1]))
                s_rev_lengend.append(1)
            else:
                m_review.append("")
                s_rev_lengend.append(-1)

        else:
            did_not.append(id_)
    print(did_not)
    '''
	for i in did_not:
		conversation,i_m,l = check_if_exists(i,'scene')
		s_wiki.append('https://en.wikipedia.org/?curid='+str(movies[i].wiki_id))
		s_chat.append(conversation)
		s_imdb_id.append(i)
		s_movie_name.append(movies[i].title)
		index_.append(i_m)
		legend.append(l)
		m_plot.append(movies[i].plot)
		temp = ""
		if i in reddit_singles:
				temp = temp + change_to_html(reddit_singles[i])
		if id_ in review_titles :
			temp  = temp + change_to_html(review_titles[i])
		m_comment.append(temp)
		if id_ in reviews:
			m_review.append(convert_to_ascii(reviews[i][0]))
			s_rev_lengend.append(0)
		else:
			m_review.append("")
			s_rev_lengend.append(-1)	
	'''
    m_plot, s_plot_legend = plot_array(m_plot)

    print(len(s_wiki))

    d = {
        'comment_1': m_comment,
        'plot_1': m_plot,
        'review_1': m_review,
        'chat_1': s_chat,
        'wiki_1': s_wiki,
        'imdb_id_1': s_imdb_id,
        'movie_name_1': s_movie_name,
        'legend_1': legend,
        'used_index_1': index_,
        'review_legend_1': s_rev_lengend,
        'plot_legend_1': s_plot_legend
    }
    df = pd.DataFrame(d)
    df.to_csv('self_chat_batch_' + str(ind) + '.csv',
              index=False,
              encoding='utf-8')
Beispiel #5
0
from numpy.random import choice as ch

import sys

"""

This code encrypts a string into another string, following thes steps:

- first the strings is sliced to 8 vertices cubic strings (as many as needed). 

The letters are successively placed at vertices following the coordinates:

(0,0,0)->(0,1,0)->(1,1,0)->(1,0,0)->(0,0,1)->(0,1,1)->(1,1,1)->(1,0,1)

- then the cubes are randomly rotated, a random number of times, independantly, up, down, left or right.

- the resulting cube positions are being read following the same path, producing the encrypted string.

- the code also embeds the reverse - decrypting - function.

"""

text = "I aml"

encrypted = "t I oamoie tllingt  enoti  ed"

key = ""

# INPUT instructions:
def opinion_augmentation(movie_id,conversation,breakpoint):
	starting_template = add_pre_template(breakpoint) + ch(opinion_open_statements)
	fav_scene = add_pre_template(breakpoint+1) + ch(movies[movie_id].opinion) 
	return augment_the_conversation(conversation,breakpoint,[starting_template,fav_scene])
def movie_review_augmentation(movie_id,conversation,breakpoint,selected_review,s=" "):
	s = add_pre_template(breakpoint) + ch(review_intro_statements) + selected_review 
	answer = add_pre_template(breakpoint+1) + ch(mid_review_accept)
	return augment_the_conversation(conversation,breakpoint,[s,answer])			
def movie_trivia_augmentation(movie_id,conversation,breakpoint,trivia_chosen,s=" "):
	s = add_pre_template(breakpoint) + ch(mid_trivia_open_statements) + trivia_chosen 
	answer = add_pre_template(breakpoint+1) + ch(mid_trivia_accept)
	return augment_the_conversation(conversation,breakpoint,[s,answer]) #change to s after first batch		
def fav_character_augmentation(movie_id,conversation,breakpoint):
	fav_scene = add_pre_template(breakpoint+1) + ch(movies[movie_id].fav_character) 
	starting_template = add_pre_template(breakpoint) + ch(fav_character_open_statements)
	return augment_the_conversation(conversation,breakpoint,[starting_template,fav_scene])
Beispiel #10
0
def ee_stat_gen_age():
    ind = ch(np.arange(len(EE_stat_age_dist)), p=EE_stat_age_prob)
    return ch(np.arange(EE_stat_age_dist[ind][0],
                        EE_stat_age_dist[ind][1] + 1))
Beispiel #11
0
def main():

    data = pd.read_csv('/home/nikita/Downloads/50 movies - Sheet2.csv'
                       )  # change file_name here
    keys = [
        'scene', 'character', 'opinion', 'critical_review', 'question',
        'remember'
    ]
    count = np.ones(len(keys))
    movie_name = data['title']
    wiki = data['wiki']
    imdb_id = data['imdb_id']
    s_movie_name = []
    s_wiki = []
    s_imdb_id = []
    s_chat = []
    did_not = []
    legend = []
    index_ = []
    m_plot = []
    m_review = []
    m_comment = []

    for m, w, id_ in zip(movie_name, wiki, imdb_id):
        conversation = None
        keys = [
            'scene', 'character', 'opinion', 'critical_review', 'question',
            'remember'
        ]
        flag = True
        while (conversation == None):
            if not keys:
                flag = False
                break
            key = ch(keys)
            conversation, i, l = check_if_exists(id_, key)
            inx = keys.index(key)
            keys.remove(key)

        if flag:
            s_wiki.append(w)
            s_chat.append(conversation)
            s_imdb_id.append(id_)
            s_movie_name.append(m)
            index_.append(i)
            legend.append(l)
            m_plot.append(movies[id_].plot)
            temp = ""
            if id_ in reddit_singles:
                temp = temp + change_to_html(reddit_singles[id_])
            if id_ in review_titles:
                temp = temp + change_to_html(review_titles[id_])
            m_comment.append(temp)
            if id_ in reviews:
                m_review.append(convert_to_ascii(reviews[id_][0]))
            else:
                m_review.append("")

            keys = [
                'scene', 'character', 'opinion', 'critical_review', 'question',
                'remember'
            ]
            inx = keys.index(key)
            count[inx] = count[inx] + 1

        else:
            did_not.append(id_)
    print(did_not)
    for i in did_not:
        conversation, i_m, l = check_if_exists(i, 'scene')
        s_wiki.append(w)
        s_chat.append(conversation)
        s_imdb_id.append(id_)
        s_movie_name.append(m)
        index_.append(i_m)
        legend.append(l)
        m_plot.append(movies[i].plot)
        temp = ""
        if i in reddit_singles:
            temp = temp + change_to_html(reddit_singles[i])
        if id_ in review_titles:
            temp = temp + change_to_html(review_titles[i])
        m_comment.append(temp)
        if id_ in reviews:
            m_review.append(convert_to_ascii(reviews[i][0]))
        else:
            m_review.append("")

    print(len(s_wiki))
    print(len(s_chat))
    print(len(s_imdb_id))
    print(did_not)
    '''
	d = {'chat_1': s_chat[0:10], 'wiki_1' : s_wiki[0:10], 'imdb_id_1': s_imdb_id[0:10], 'movie_name_1': s_movie_name[0:10], 'legend_1': legend[0:10], 'used_index_1': index_[0:10],
	'chat_2': s_chat[10:20], 'wiki_2' : s_wiki[10:20], 'imdb_id_2': s_imdb_id[10:20], 'movie_name_2': s_movie_name[10:20], 'legend_2': legend[10:20], 'used_index_2': index_[10:20],
	'chat_3': s_chat[20:30], 'wiki_3' : s_wiki[20:30] ,'imdb_id_3': s_imdb_id[20:30], 'movie_name_3': s_movie_name[20:30], 'legend_3': legend[20:30], 'used_index_3': index_[20:30],
	'chat_4': s_chat[30:40], 'wiki_4' : s_wiki[30:40], 'imdb_id_4': s_imdb_id[30:40], 'movie_name_4': s_movie_name[30:40], 'legend_4': legend[30:40], 'used_index_4': index_[30:40],
	'chat_5': s_chat[40:], 'wiki_5' : s_wiki[40:], 'imdb_id_5': s_imdb_id[40:], 'movie_name54': s_movie_name[40:], 'legend_5': legend[40:], 'used_index_5': index_[40:],
	'plot_1': m_plot[0:10], 'plot_2' : m_plot[10:20], 'plot_3': m_plot[20:30], 'plot_4': m_plot[30:40], 'plot_5': m_plot[40:],
	'review_1': m_review[0:10], 'review_2' : m_review[10:20], 'review_3': m_review[20:30], 'review_4': m_review[30:40], 'review_5': m_review[40:],
	'comment_1': m_comment[0:10], 'comment_2' : m_comment[10:20], 'comment_3': m_comment[20:30], 'comment_4': m_comment[30:40], 'comment_5': m_comment[40:]
	}

'''
    d = {
        'chat_1': s_chat[0:25],
        'wiki_1': s_wiki[0:25],
        'imdb_id_1': s_imdb_id[0:25],
        'movie_name_1': s_movie_name[0:25],
        'legend_1': legend[0:25],
        'used_index_1': index_[0:25],
        'chat_2': s_chat[25:],
        'wiki_2': s_wiki[25:],
        'imdb_id_2': s_imdb_id[25:],
        'movie_name_2': s_movie_name[25:],
        'legend_2': legend[25:],
        'used_index_2': index_[25:],
        'plot_1': m_plot[0:25],
        'plot_2': m_plot[25:],
        'review_1': m_review[0:25],
        'review_2': m_review[25:],
        'comment_1': m_comment[0:25],
        'comment_2': m_comment[25:]
    }

    df = pd.DataFrame(d)
    df.to_csv('augmented_start_batch_50_2_1.csv', index=False)
    print('Complete')