} all_result_files = set(listdir(RESULT_LOCATION)) todo = set(all_detectors.keys()) - all_result_files if len(todo) == 0: print('There are result files for all detectors. Stopping') break print('Detectors without result files:', ', '.join(todo)) detector = list(todo)[0] print('Doing', detector) if len(conversations) == 0: print('Importing conversations') conversations = import_conversations(CONVERSATIONS_FILE) print('Loading pretrained model') if detector in ['m04']: exp_hare = Hare(name='dict_based') exp_hare.brain = DictBasedBrain() exp_hare.brain.bad_words = [ 'f**k', 'fck', 'fuk', 'shit', 'stfu', 'wtf', 'suck', 'noob', 'newb', 'n00b', 'f*g', 'loser' ] else: exp_hare = load_pretrained(PRETRAINED_MODELS_LOCATION + all_detectors[detector]) result_file = open(RESULT_LOCATION + detector, 'w')
from sys import argv from hare import Hare from hare.brain import DictBasedBrain from hare.conversation import import_conversations THRESHOLDS = [0.01, 0.025, 0.05, 0.075, 0.1, 0.25, 0.5, 0.75] CONVERSATIONS_FILE = '../datasets/LoL/heldout_conversations_anon.txt' NR_OF_CONVERSATIONS = 1000 print('Importing conversations') conversations = import_conversations(CONVERSATIONS_FILE)[:NR_OF_CONVERSATIONS] print('Loading pretrained model') exp_hare = Hare(name='dict_based') exp_hare.brain = DictBasedBrain() exp_hare.brain.bad_words = [ 'f**k', 'fck', 'fuk', 'shit', 'stfu', 'wtf', 'suck', 'noob', 'newb', 'n00b', 'f*g', 'loser' ] result_file = open('moba_dic', 'w') for n, conversation in enumerate(conversations): print('Processing conv', n) exp_hare.add_conversation(conversation) exp_hare.save_history_for_conversation(result_file, n)
'moba_bigru_embeddings_100': [ 0.001, 0.0025, 0.005, 0.0075, 0.01, 0.025, 0.05, 0.075, 0.1, 0.25, 0.5, 0.75, 1 ] } BETA_VALUES = [0.001, 0.01, 0.1, 1, 10, 100, 1000] CONVERSATIONS_FILE = HARE_ROOT + '/datasets/LoL/heldout_conversations_anon.txt' CONVERSATION_LENGTH = 200 NR_OF_CONVERSATIONS = 10 OUTPUT_FOLDER = ESSAY_ROOT + 'precalculated_data/' #From all heldout conversations, load the first 10 conversations = import_conversations( CONVERSATIONS_FILE, cutoff_point=CONVERSATION_LENGTH)[:NR_OF_CONVERSATIONS] #Cut away utterances by some speakers #Save the true target data open(OUTPUT_FOLDER + 'target.js', 'w').write( dumps( [conversation.speakers_with_labels for conversation in conversations])) #Go through all detectors, with all thresholds hares = [] for conv_hist_file, thresholds in CONVERSATION_HISTORY_FILES_WITH_THRESHOLDS.items( ): #For each conversation, read the status at every point during the conversation