from hare import Hare, Conversation from hare.tensorflowbrain import BiGruBrain mockhare = Hare() mockhare.brain = BiGruBrain() for i in range(10000): convo = Conversation() convo.add_utterance(speaker='a', content='c c c c c') convo.add_utterance(speaker='b', content='c c c c c') convo.add_utterance(speaker='b', content='c c c c b') convo.add_utterance(speaker='a', content='c c c c a') convo.label_speaker('b', 1) mockhare.add_conversation(convo) mockhare.train() mockhare.visualize_history_for_conversation()
from sys import argv from hare import Hare from hare.brain import DictBasedBrain from hare.conversation import import_conversations THRESHOLDS = [0.01, 0.025, 0.05, 0.075, 0.1, 0.25, 0.5, 0.75] CONVERSATIONS_FILE = '../datasets/LoL/heldout_conversations_anon.txt' NR_OF_CONVERSATIONS = 1000 print('Importing conversations') conversations = import_conversations(CONVERSATIONS_FILE)[:NR_OF_CONVERSATIONS] print('Loading pretrained model') exp_hare = Hare(name='dict_based') exp_hare.brain = DictBasedBrain() exp_hare.brain.bad_words = [ 'f**k', 'fck', 'fuk', 'shit', 'stfu', 'wtf', 'suck', 'noob', 'newb', 'n00b', 'f*g', 'loser' ] result_file = open('moba_dic', 'w') for n, conversation in enumerate(conversations): print('Processing conv', n) exp_hare.add_conversation(conversation) exp_hare.save_history_for_conversation(result_file, n)
from hare import Hare, Conversation from hare.brain import BiGruBrain brain: BiGruBrain = BiGruBrain() brain.embedding_location = '/vol/bigdata/word_embeddings/glove/glove.6B.50d.txt' brain.verbose = True hare = Hare() hare.brain = brain convo = Conversation() convo.add_utterance(speaker='a', content='hate you') convo.add_utterance(speaker='b', content='i love you') convo.label_speaker('a', 1) hare.add_conversation(convo) hare.train() hare.save('/vol/tensusers2/wstoop/HaRe/hare/pretrained/simple') hare.update_status_history_for_conversation() hare.visualize_history_for_conversation()
elif line[0] == '#': try: current_conversation.label_speaker(line.split()[1], 1) except IndexError: continue conversations.append(current_conversation) current_conversation = Conversation() if len(conversations) % 100 == 0: print(len(conversations)) if len(conversations) == NR_OF_CONVERSATIONS: break continue speaker, content = line.split('\t') current_conversation.add_utterance(speaker, content) #Add to a hare object moba_hare = Hare() for conversation in conversations: moba_hare.add_conversation(conversation) moba_hare.brain = BiGruBrain() moba_hare.brain.downsampling = True moba_hare.brain._max_sequence_length = 500 moba_hare.train() moba_hare.save('moba')
from hare import Hare, Conversation from hare.tensorflowbrain import LSTMBrain, BiGruBrain from hare.conversation import import_conversations from hare.embedding import load_embedding_dictionary #Load the conversations DATA_ROOT = '../datasets/LoL/' CONVERSATIONS_FILE = DATA_ROOT+'train_conversations_anon.txt' print('Importing conversations') conversations = import_conversations(CONVERSATIONS_FILE) #Add to a hare object moba_hare = Hare() for conversation in conversations: moba_hare.add_conversation(conversation) brain = BiGruBrain() brain.embedding_location = DATA_ROOT+'train_toxic_embeddings' brain.verbose = True brain.downsampling = True brain.learning_epochs = 10 brain._max_sequence_length = 500 brain.include_casing_information = True moba_hare.brain = brain moba_hare.train() moba_hare.save('m05')
current_conversation = Conversation() if len(conversations) % 100 == 0: print(len(conversations)) if len(conversations) == NR_OF_CONVERSATIONS: break continue speaker, content = line.split('\t') current_conversation.add_utterance(speaker, content) #Add to a hare object for downsample_ratio in DOWNSAMPLE_RATIOS: for training_size in TRAINING_SIZES: print('===', 'training', downsample_ratio, training_size, '===') exp_hare = Hare() for conversation in conversations[:training_size]: exp_hare.add_conversation(conversation) exp_hare.brain = BiGruBrain() exp_hare.brain.downsampling = True exp_hare.brain.downsampling_ratio = downsample_ratio exp_hare.brain._max_sequence_length = 500 exp_hare.train() exp_hare.save('moba_' + str(downsample_ratio) + '_' + str(training_size))
from hare import Hare, Conversation from hare.brain import RandomBrain random_hare = Hare() random_hare.brain = RandomBrain() convo = Conversation() random_hare.add_conversation(convo) convo.add_utterance(speaker='a', content='hello') convo.add_utterance(speaker='b', content='hello') convo.add_utterance(speaker='a', content='how is everyone doing?') random_hare.update_status_history_for_conversation(0) print(random_hare.status_per_conversation[0]) convo.label_speaker('b', 0.9) acc = random_hare.calculate_retrospective_accuracy() print(acc)
from hare import load_pretrained, load_example_conversations from hare.visualize import visualize_retrospective_precision_and_recall from hare import Hare from hare.brain import DictBasedBrain moba_hare = Hare() moba_hare.brain = DictBasedBrain() for conversation in load_example_conversations(): moba_hare.add_conversation(conversation) THRESHOLDS = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9] visualize_retrospective_precision_and_recall(moba_hare)