from hare import Hare, Conversation from hare.tensorflowbrain import BiGruBrain mockhare = Hare() mockhare.brain = BiGruBrain() for i in range(10000): convo = Conversation() convo.add_utterance(speaker='a', content='c c c c c') convo.add_utterance(speaker='b', content='c c c c c') convo.add_utterance(speaker='b', content='c c c c b') convo.add_utterance(speaker='a', content='c c c c a') convo.label_speaker('b', 1) mockhare.add_conversation(convo) mockhare.train() mockhare.visualize_history_for_conversation()
#Add the precalculated conversation history to hare objects hares = [] for conv_hist_file in CONVERSATION_HISTORY_FILES: status_per_conversation = [] for n, line in enumerate(open(conv_hist_file)): status_per_conversation.append(loads(line)[:CONVERSATION_LENGTH]) if n % 100 == 0: print(conv_hist_file, n) for threshold in THRESHOLDS: h = Hare(name=threshold) h.status_per_conversation = status_per_conversation h.cut_off_value = threshold hares.append(h) #Load the conversations conversations = [] current_conversation = Conversation() for line in open(CONVERSATIONS_FILE): line = line.strip() if len(line) == 0: continue
from hare import Hare, Conversation from hare.brain import BiGruBrain brain: BiGruBrain = BiGruBrain() brain.embedding_location = '/vol/bigdata/word_embeddings/glove/glove.6B.50d.txt' brain.verbose = True hare = Hare() hare.brain = brain convo = Conversation() convo.add_utterance(speaker='a', content='hate you') convo.add_utterance(speaker='b', content='i love you') convo.label_speaker('a', 1) hare.add_conversation(convo) hare.train() hare.save('/vol/tensusers2/wstoop/HaRe/hare/pretrained/simple') hare.update_status_history_for_conversation() hare.visualize_history_for_conversation()
elif line[0] == '#': try: current_conversation.label_speaker(line.split()[1], 1) except IndexError: continue conversations.append(current_conversation) current_conversation = Conversation() if len(conversations) % 100 == 0: print(len(conversations)) if len(conversations) == NR_OF_CONVERSATIONS: break continue speaker, content = line.split('\t') current_conversation.add_utterance(speaker, content) #Add to a hare object moba_hare = Hare() for conversation in conversations: moba_hare.add_conversation(conversation) moba_hare.brain = BiGruBrain() moba_hare.brain.downsampling = True moba_hare.brain._max_sequence_length = 500 moba_hare.train() moba_hare.save('moba')
from sys import argv from hare import Hare from hare.brain import DictBasedBrain from hare.conversation import import_conversations THRESHOLDS = [0.01, 0.025, 0.05, 0.075, 0.1, 0.25, 0.5, 0.75] CONVERSATIONS_FILE = '../datasets/LoL/heldout_conversations_anon.txt' NR_OF_CONVERSATIONS = 1000 print('Importing conversations') conversations = import_conversations(CONVERSATIONS_FILE)[:NR_OF_CONVERSATIONS] print('Loading pretrained model') exp_hare = Hare(name='dict_based') exp_hare.brain = DictBasedBrain() exp_hare.brain.bad_words = [ 'f**k', 'fck', 'fuk', 'shit', 'stfu', 'wtf', 'suck', 'noob', 'newb', 'n00b', 'f*g', 'loser' ] result_file = open('moba_dic', 'w') for n, conversation in enumerate(conversations): print('Processing conv', n) exp_hare.add_conversation(conversation) exp_hare.save_history_for_conversation(result_file, n)
#Add the precalculated conversation history to hare objects hares = [] for conv_hist_file in CONVERSATION_HISTORY_FILES: status_per_conversation = [] for n, line in enumerate(open(conv_hist_file)): status_per_conversation.append(loads(line)[:CONVERSATION_LENGTH]) if n % 100 == 0: print(conv_hist_file, n) for threshold in THRESHOLDS: h = Hare(name=conv_hist_file + ':' + str(threshold)) h.status_per_conversation = status_per_conversation h.cut_off_value = threshold hares.append(h) #Load the conversations conversations = [] current_conversation = Conversation() for line in open(CONVERSATIONS_FILE): line = line.strip() if len(line) == 0: continue
from hare import Hare, Conversation from hare.tensorflowbrain import LSTMBrain, BiGruBrain from hare.conversation import import_conversations from hare.embedding import load_embedding_dictionary #Load the conversations DATA_ROOT = '../datasets/LoL/' CONVERSATIONS_FILE = DATA_ROOT+'train_conversations_anon.txt' print('Importing conversations') conversations = import_conversations(CONVERSATIONS_FILE) #Add to a hare object moba_hare = Hare() for conversation in conversations: moba_hare.add_conversation(conversation) brain = BiGruBrain() brain.embedding_location = DATA_ROOT+'train_toxic_embeddings' brain.verbose = True brain.downsampling = True brain.learning_epochs = 10 brain._max_sequence_length = 500 brain.include_casing_information = True moba_hare.brain = brain moba_hare.train() moba_hare.save('m05')
# to do met conversation.remove_speaker() #Create a hare object for each threshold for threshold in thresholds: detector_name = conv_hist_file + '@' + str(threshold) folder_name = OUTPUT_FOLDER + detector_name + '/' try: rmtree(folder_name) except FileNotFoundError: pass mkdir(folder_name) h = Hare(name=detector_name) h.status_per_conversation = status_per_conversation h.cut_off_value = threshold for conversation in conversations: h.add_conversation(conversation) hares.append(h) per_player = [] tp = [] fp = [] fbeta = {b: [] for b in BETA_VALUES} #Calculate metrics for this detector/threshold combi for utterance_index in range(CONVERSATION_LENGTH):
current_conversation = Conversation() if len(conversations) % 100 == 0: print(len(conversations)) if len(conversations) == NR_OF_CONVERSATIONS: break continue speaker, content = line.split('\t') current_conversation.add_utterance(speaker, content) #Add to a hare object for downsample_ratio in DOWNSAMPLE_RATIOS: for training_size in TRAINING_SIZES: print('===', 'training', downsample_ratio, training_size, '===') exp_hare = Hare() for conversation in conversations[:training_size]: exp_hare.add_conversation(conversation) exp_hare.brain = BiGruBrain() exp_hare.brain.downsampling = True exp_hare.brain.downsampling_ratio = downsample_ratio exp_hare.brain._max_sequence_length = 500 exp_hare.train() exp_hare.save('moba_' + str(downsample_ratio) + '_' + str(training_size))
from hare import Hare, Conversation from hare.brain import RandomBrain random_hare = Hare() random_hare.brain = RandomBrain() convo = Conversation() random_hare.add_conversation(convo) convo.add_utterance(speaker='a', content='hello') convo.add_utterance(speaker='b', content='hello') convo.add_utterance(speaker='a', content='how is everyone doing?') random_hare.update_status_history_for_conversation(0) print(random_hare.status_per_conversation[0]) convo.label_speaker('b', 0.9) acc = random_hare.calculate_retrospective_accuracy() print(acc)
for convo_index, conversation in enumerate(conversations): #For this conversation, we want to know the conversation history for every detector for detector, thresholds in DETECTOR_THRESHOLDS.items(): conversation_history = loads(open_conversation_history_files[detector].readline()) scores_for_this_conversation = [] #We check what scores this means for every threshold for threshold in thresholds: if threshold not in all_scores[detector].keys(): all_scores[detector][threshold] = [] h = Hare() h.add_conversation(conversation) h.status_per_conversation = [conversation_history] h.cut_off_value = threshold for point_in_conversation in INTERESTING_POINTS_IN_CONVERSATION: true, predicted = h.get_true_and_predicted_scores_at_utterance_index(point_in_conversation,categorize_predicted_scores=True) score = fbeta_score(true, predicted, 1) scores_for_this_conversation.append(score) if convo_index == 0: headers.append(detector+':'+str(threshold)+':'+str(point_in_conversation)) all_scores[detector][threshold].append(scores_for_this_conversation[-len(INTERESTING_POINTS_IN_CONVERSATION):])
CONVERSATION_LENGTH = 200 #Add the precalculated conversation history to hare objects hares = [] for conv_hist_file in CONVERSATION_HISTORY_FILES: status_per_conversation = [] for n, line in enumerate(open(conv_hist_file)): status_per_conversation.append(loads(line)[:CONVERSATION_LENGTH]) if n % 100 == 0: print(conv_hist_file, n) h = Hare(name=conv_hist_file) h.status_per_conversation = status_per_conversation hares.append(h) #Load the conversations conversations = [] current_conversation = Conversation() for line in open(CONVERSATIONS_FILE): line = line.strip() if len(line) == 0: continue elif line[0] == '#':
from hare import Hare, Conversation from hare.tensorflowbrain import LSTMBrain, BiGruBrain from hare.conversation import import_conversations from hare.embedding import load_embedding_dictionary #Load the conversations CONVERSATIONS_FILE = 'datasets/LoL/train_conversations_anon.txt' print('Importing conversations') conversations = import_conversations(CONVERSATIONS_FILE) #Add to a hare object moba_hare = Hare() for conversation in conversations: moba_hare.add_conversation(conversation) moba_hare.brain = BiGruBrain() moba_hare.brain.embedding_location = 'datasets/LoL/train_toxic_embeddings' moba_hare.brain.verbose = True moba_hare.brain.downsampling = True moba_hare.brain.learning_epochs = 10 moba_hare.brain._max_sequence_length = 500 moba_hare.train() moba_hare.save('moba_bigru_embedding')
from hare import load_pretrained, load_example_conversations from hare.visualize import visualize_retrospective_precision_and_recall from hare import Hare from hare.brain import DictBasedBrain moba_hare = Hare() moba_hare.brain = DictBasedBrain() for conversation in load_example_conversations(): moba_hare.add_conversation(conversation) THRESHOLDS = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9] visualize_retrospective_precision_and_recall(moba_hare)