Beispiel #1
0
    }
    all_result_files = set(listdir(RESULT_LOCATION))
    todo = set(all_detectors.keys()) - all_result_files

    if len(todo) == 0:
        print('There are result files for all detectors. Stopping')
        break

    print('Detectors without result files:', ', '.join(todo))
    detector = list(todo)[0]

    print('Doing', detector)

    if len(conversations) == 0:
        print('Importing conversations')
        conversations = import_conversations(CONVERSATIONS_FILE)

    print('Loading pretrained model')
    if detector in ['m04']:
        exp_hare = Hare(name='dict_based')
        exp_hare.brain = DictBasedBrain()
        exp_hare.brain.bad_words = [
            'f**k', 'fck', 'fuk', 'shit', 'stfu', 'wtf', 'suck', 'noob',
            'newb', 'n00b', 'f*g', 'loser'
        ]
    else:
        exp_hare = load_pretrained(PRETRAINED_MODELS_LOCATION +
                                   all_detectors[detector])

    result_file = open(RESULT_LOCATION + detector, 'w')
from sys import argv

from hare import Hare
from hare.brain import DictBasedBrain
from hare.conversation import import_conversations

THRESHOLDS = [0.01, 0.025, 0.05, 0.075, 0.1, 0.25, 0.5, 0.75]
CONVERSATIONS_FILE = '../datasets/LoL/heldout_conversations_anon.txt'
NR_OF_CONVERSATIONS = 1000

print('Importing conversations')
conversations = import_conversations(CONVERSATIONS_FILE)[:NR_OF_CONVERSATIONS]

print('Loading pretrained model')

exp_hare = Hare(name='dict_based')
exp_hare.brain = DictBasedBrain()
exp_hare.brain.bad_words = [
    'f**k', 'fck', 'fuk', 'shit', 'stfu', 'wtf', 'suck', 'noob', 'newb',
    'n00b', 'f*g', 'loser'
]

result_file = open('moba_dic', 'w')

for n, conversation in enumerate(conversations):

    print('Processing conv', n)
    exp_hare.add_conversation(conversation)
    exp_hare.save_history_for_conversation(result_file, n)
Beispiel #3
0
    'moba_bigru_embeddings_100': [
        0.001, 0.0025, 0.005, 0.0075, 0.01, 0.025, 0.05, 0.075, 0.1, 0.25, 0.5,
        0.75, 1
    ]
}

BETA_VALUES = [0.001, 0.01, 0.1, 1, 10, 100, 1000]

CONVERSATIONS_FILE = HARE_ROOT + '/datasets/LoL/heldout_conversations_anon.txt'
CONVERSATION_LENGTH = 200
NR_OF_CONVERSATIONS = 10

OUTPUT_FOLDER = ESSAY_ROOT + 'precalculated_data/'

#From all heldout conversations, load the first 10
conversations = import_conversations(
    CONVERSATIONS_FILE, cutoff_point=CONVERSATION_LENGTH)[:NR_OF_CONVERSATIONS]

#Cut away utterances by some speakers

#Save the true target data
open(OUTPUT_FOLDER + 'target.js', 'w').write(
    dumps(
        [conversation.speakers_with_labels for conversation in conversations]))

#Go through all detectors, with all thresholds
hares = []

for conv_hist_file, thresholds in CONVERSATION_HISTORY_FILES_WITH_THRESHOLDS.items(
):

    #For each conversation, read the status at every point during the conversation