예제 #1
0
from hare import Hare, Conversation
from hare.tensorflowbrain import BiGruBrain

mockhare = Hare()
mockhare.brain = BiGruBrain()

for i in range(10000):
    convo = Conversation()
    convo.add_utterance(speaker='a', content='c c c c c')
    convo.add_utterance(speaker='b', content='c c c c c')
    convo.add_utterance(speaker='b', content='c c c c b')
    convo.add_utterance(speaker='a', content='c c c c a')
    convo.label_speaker('b', 1)

    mockhare.add_conversation(convo)

mockhare.train()
mockhare.visualize_history_for_conversation()
#Add the precalculated conversation history to hare objects
hares = []

for conv_hist_file in CONVERSATION_HISTORY_FILES:

    status_per_conversation = []

    for n, line in enumerate(open(conv_hist_file)):
        status_per_conversation.append(loads(line)[:CONVERSATION_LENGTH])

        if n % 100 == 0:
            print(conv_hist_file, n)

    for threshold in THRESHOLDS:
        h = Hare(name=threshold)
        h.status_per_conversation = status_per_conversation
        h.cut_off_value = threshold

        hares.append(h)

#Load the conversations
conversations = []
current_conversation = Conversation()

for line in open(CONVERSATIONS_FILE):

    line = line.strip()

    if len(line) == 0:
        continue
예제 #3
0
from hare import Hare, Conversation
from hare.brain import BiGruBrain

brain: BiGruBrain = BiGruBrain()
brain.embedding_location = '/vol/bigdata/word_embeddings/glove/glove.6B.50d.txt'
brain.verbose = True

hare = Hare()
hare.brain = brain

convo = Conversation()
convo.add_utterance(speaker='a', content='hate you')
convo.add_utterance(speaker='b', content='i love you')
convo.label_speaker('a', 1)

hare.add_conversation(convo)

hare.train()
hare.save('/vol/tensusers2/wstoop/HaRe/hare/pretrained/simple')

hare.update_status_history_for_conversation()
hare.visualize_history_for_conversation()
예제 #4
0
    elif line[0] == '#':
        try:
            current_conversation.label_speaker(line.split()[1], 1)
        except IndexError:
            continue

        conversations.append(current_conversation)
        current_conversation = Conversation()

        if len(conversations) % 100 == 0:
            print(len(conversations))

        if len(conversations) == NR_OF_CONVERSATIONS:
            break

        continue

    speaker, content = line.split('\t')
    current_conversation.add_utterance(speaker, content)

#Add to a hare object
moba_hare = Hare()
for conversation in conversations:
    moba_hare.add_conversation(conversation)

moba_hare.brain = BiGruBrain()
moba_hare.brain.downsampling = True
moba_hare.brain._max_sequence_length = 500

moba_hare.train()
moba_hare.save('moba')
from sys import argv

from hare import Hare
from hare.brain import DictBasedBrain
from hare.conversation import import_conversations

THRESHOLDS = [0.01, 0.025, 0.05, 0.075, 0.1, 0.25, 0.5, 0.75]
CONVERSATIONS_FILE = '../datasets/LoL/heldout_conversations_anon.txt'
NR_OF_CONVERSATIONS = 1000

print('Importing conversations')
conversations = import_conversations(CONVERSATIONS_FILE)[:NR_OF_CONVERSATIONS]

print('Loading pretrained model')

exp_hare = Hare(name='dict_based')
exp_hare.brain = DictBasedBrain()
exp_hare.brain.bad_words = [
    'f**k', 'fck', 'fuk', 'shit', 'stfu', 'wtf', 'suck', 'noob', 'newb',
    'n00b', 'f*g', 'loser'
]

result_file = open('moba_dic', 'w')

for n, conversation in enumerate(conversations):

    print('Processing conv', n)
    exp_hare.add_conversation(conversation)
    exp_hare.save_history_for_conversation(result_file, n)
예제 #6
0
#Add the precalculated conversation history to hare objects
hares = []

for conv_hist_file in CONVERSATION_HISTORY_FILES:

    status_per_conversation = []

    for n, line in enumerate(open(conv_hist_file)):
        status_per_conversation.append(loads(line)[:CONVERSATION_LENGTH])

        if n % 100 == 0:
            print(conv_hist_file, n)

    for threshold in THRESHOLDS:
        h = Hare(name=conv_hist_file + ':' + str(threshold))
        h.status_per_conversation = status_per_conversation
        h.cut_off_value = threshold

        hares.append(h)

#Load the conversations
conversations = []
current_conversation = Conversation()

for line in open(CONVERSATIONS_FILE):

    line = line.strip()

    if len(line) == 0:
        continue
예제 #7
0
from hare import Hare, Conversation
from hare.tensorflowbrain import LSTMBrain, BiGruBrain
from hare.conversation import import_conversations

from hare.embedding import load_embedding_dictionary

#Load the conversations
DATA_ROOT = '../datasets/LoL/'
CONVERSATIONS_FILE = DATA_ROOT+'train_conversations_anon.txt'
print('Importing conversations')
conversations = import_conversations(CONVERSATIONS_FILE)

#Add to a hare object
moba_hare = Hare()
for conversation in conversations:
    moba_hare.add_conversation(conversation)

brain = BiGruBrain()
brain.embedding_location = DATA_ROOT+'train_toxic_embeddings'
brain.verbose = True
brain.downsampling = True
brain.learning_epochs = 10
brain._max_sequence_length = 500
brain.include_casing_information = True

moba_hare.brain = brain
moba_hare.train()
moba_hare.save('m05')
예제 #8
0
    # to do met conversation.remove_speaker()

    #Create a hare object for each threshold
    for threshold in thresholds:

        detector_name = conv_hist_file + '@' + str(threshold)
        folder_name = OUTPUT_FOLDER + detector_name + '/'

        try:
            rmtree(folder_name)
        except FileNotFoundError:
            pass

        mkdir(folder_name)

        h = Hare(name=detector_name)
        h.status_per_conversation = status_per_conversation
        h.cut_off_value = threshold

        for conversation in conversations:
            h.add_conversation(conversation)
            hares.append(h)

        per_player = []
        tp = []
        fp = []
        fbeta = {b: [] for b in BETA_VALUES}

        #Calculate metrics for this detector/threshold combi
        for utterance_index in range(CONVERSATION_LENGTH):
예제 #9
0
        current_conversation = Conversation()

        if len(conversations) % 100 == 0:
            print(len(conversations))

        if len(conversations) == NR_OF_CONVERSATIONS:
            break

        continue

    speaker, content = line.split('\t')
    current_conversation.add_utterance(speaker, content)

#Add to a hare object
for downsample_ratio in DOWNSAMPLE_RATIOS:
    for training_size in TRAINING_SIZES:

        print('===', 'training', downsample_ratio, training_size, '===')

        exp_hare = Hare()
        for conversation in conversations[:training_size]:
            exp_hare.add_conversation(conversation)

        exp_hare.brain = BiGruBrain()
        exp_hare.brain.downsampling = True
        exp_hare.brain.downsampling_ratio = downsample_ratio
        exp_hare.brain._max_sequence_length = 500

        exp_hare.train()
        exp_hare.save('moba_' + str(downsample_ratio) + '_' +
                      str(training_size))
예제 #10
0
from hare import Hare, Conversation
from hare.brain import RandomBrain

random_hare = Hare()
random_hare.brain = RandomBrain()

convo = Conversation()
random_hare.add_conversation(convo)

convo.add_utterance(speaker='a', content='hello')
convo.add_utterance(speaker='b', content='hello')
convo.add_utterance(speaker='a', content='how is everyone doing?')

random_hare.update_status_history_for_conversation(0)
print(random_hare.status_per_conversation[0])

convo.label_speaker('b', 0.9)
acc = random_hare.calculate_retrospective_accuracy()

print(acc)
for convo_index, conversation in enumerate(conversations):

    #For this conversation, we want to know the conversation history for every detector
    for detector, thresholds in DETECTOR_THRESHOLDS.items():
        conversation_history = loads(open_conversation_history_files[detector].readline())

        scores_for_this_conversation = []

        #We check what scores this means for every threshold
        for threshold in thresholds:

            if threshold not in all_scores[detector].keys():
                all_scores[detector][threshold] = []

            h = Hare()
            h.add_conversation(conversation)
            h.status_per_conversation = [conversation_history]
            h.cut_off_value = threshold

            for point_in_conversation in INTERESTING_POINTS_IN_CONVERSATION:
                true, predicted = h.get_true_and_predicted_scores_at_utterance_index(point_in_conversation,categorize_predicted_scores=True)
                score = fbeta_score(true, predicted, 1)

                scores_for_this_conversation.append(score)

                if convo_index == 0:
                    headers.append(detector+':'+str(threshold)+':'+str(point_in_conversation))

            all_scores[detector][threshold].append(scores_for_this_conversation[-len(INTERESTING_POINTS_IN_CONVERSATION):])
CONVERSATION_LENGTH = 200

#Add the precalculated conversation history to hare objects
hares = []

for conv_hist_file in CONVERSATION_HISTORY_FILES:

    status_per_conversation = []

    for n, line in enumerate(open(conv_hist_file)):
        status_per_conversation.append(loads(line)[:CONVERSATION_LENGTH])

        if n % 100 == 0:
            print(conv_hist_file, n)

    h = Hare(name=conv_hist_file)
    h.status_per_conversation = status_per_conversation

    hares.append(h)

#Load the conversations
conversations = []
current_conversation = Conversation()

for line in open(CONVERSATIONS_FILE):

    line = line.strip()

    if len(line) == 0:
        continue
    elif line[0] == '#':
예제 #13
0
from hare import Hare, Conversation
from hare.tensorflowbrain import LSTMBrain, BiGruBrain
from hare.conversation import import_conversations

from hare.embedding import load_embedding_dictionary

#Load the conversations
CONVERSATIONS_FILE = 'datasets/LoL/train_conversations_anon.txt'
print('Importing conversations')
conversations = import_conversations(CONVERSATIONS_FILE)

#Add to a hare object
moba_hare = Hare()
for conversation in conversations:
    moba_hare.add_conversation(conversation)

moba_hare.brain = BiGruBrain()
moba_hare.brain.embedding_location = 'datasets/LoL/train_toxic_embeddings'
moba_hare.brain.verbose = True
moba_hare.brain.downsampling = True
moba_hare.brain.learning_epochs = 10
moba_hare.brain._max_sequence_length = 500

moba_hare.train()
moba_hare.save('moba_bigru_embedding')
예제 #14
0
파일: test.py 프로젝트: bjmiller16/HaRe
from hare import load_pretrained, load_example_conversations
from hare.visualize import visualize_retrospective_precision_and_recall

from hare import Hare
from hare.brain import DictBasedBrain

moba_hare = Hare()
moba_hare.brain = DictBasedBrain()

for conversation in load_example_conversations():
    moba_hare.add_conversation(conversation)

THRESHOLDS = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]

visualize_retrospective_precision_and_recall(moba_hare)