def topKCandidatesAccuracyPlot(k, n):
    start_time = datetime.now()
    
    topKAccuracy = []
    x = list(range(5,501))

    for session_id in dataset.get_session_ids():
        count = 0
        model = Model()

        for state, language, target_output in tqdm.tqdm(dataset.get_session_data(session_id)): 
            if count == 65:
                break

            tuple_state = tuple([tuple(state[i]) for i in range(len(state))]) 
            tuple_target_output = tuple([tuple(target_output[i]) for i in range(len(target_output))])          
            tup = (tuple_state, language, tuple_target_output)

            # Add top K candidates list for this (state, language, target output) to session_data
            k_candidate_success = topKCandidatesPlot(state, language, target_output, model)
            
            if topKAccuracy = []:
                topKAccuracy = k_candidate_success
            else:
                topKAccuracy = [x + y for x, y in zip(topKAccuracy, k_candidate_success)]

            # Update model, as is done in evaluate() in evaluate.py
            model.update(state, language, target_output)
            count += 1
        break
Esempio n. 2
0
def evaluate_batch(data_size, test_size=500):
    results = []
    for session_id in dataset.get_session_ids():
        model = Model()
        session_data = list(dataset.get_session_data(session_id))
        assert len(session_data) > data_size+test_size
        for state, language, target_output in session_data[:data_size]:
            model.update(state, language, target_output, 0)

        for i in range(50):
            model.optimizer_step()

        print(' training accuracy: %s%%' % (100*model.training_accuracy()))

        total_correct = 0
        total_examples = 0
        for state, language, target_output in session_data[-test_size:]:
            predicted = model.predict(state, language)
            if predicted == target_output:
                total_correct += 1
            total_examples += 1

        print(' test accuracy: %s%%' % (100*total_correct/total_examples))
        results.append(total_correct/total_examples)
    print('average test accuracy: %s%%' % (100*np.mean(results)))
def topKCandidatesAccuracyBatched(k, n):
    # sessions will have key-value pairs of session_id, session_data
    # sessions = dict()

    for session_id in dataset.get_session_ids():
        count = 0
        number_accurate = 0
        model = Model()

        for state, language, target_output in tqdm.tqdm(dataset.get_session_data(session_id)): 
            if count == n:
                break

            tuple_state = tuple([tuple(state[i]) for i in range(len(state))]) 
            tuple_target_output = tuple([tuple(target_output[i]) for i in range(len(target_output))])          
            tup = (tuple_state, language, tuple_target_output)

            # Add top K candidates list for this (state, language, target output) to session_data
            k_candidate_success = topKCandidatesHelper(k, state, language, target_output, model)

            if k_candidate_success != float('inf'):
                number_accurate += 1

            # Update model, as is done in evaluate() in evaluate.py
            model.update(state, language, target_output)

            count += 1
        
        print("Top K accuracy: " + str(number_accurate / count))

        with open("dataset_sessions_top_k_accuracies.txt", 'a') as f:
            f.write(str(datetime.now()-start_time) + " " + str(session_id) + " " + str(number_accurate/count) + " \n")
Esempio n. 4
0
    def __init__(self):
        self.vocab = []
        self.vocab_id_map = {}

        self.vocab_index = Index()
        self.feature_index = Index()

        # tokenizer
        special_cases = {
            Vocabulary.START: [{
                ORTH: Vocabulary.START
            }],
            Vocabulary.END: [{
                ORTH: Vocabulary.END
            }]
        }
        self.tokenizer = Tokenizer(English().vocab, rules=special_cases)

        self.token_count = Counter()

        for session_id in dataset.get_session_ids():
            for (_, language, _) in dataset.get_session_data(session_id):
                tokens = self.raw_tokens(language, unk=False)
                self.token_count.update(tokens)

        for token, count in self.token_count.most_common():
            if count > FLAGS.unk_threshold:
                self.vocab_index.index(token)

        feature_count = Counter()
        for session_id in dataset.get_session_ids():
            for (_, language, _) in dataset.get_session_data(session_id):
                # tokens = self.raw_tokens(language)
                # for token in tokens:
                #     self.vocab_index.index(token)

                features = self.raw_features(language)
                feature_count.update(features)

        for feature, count in feature_count.most_common():
            self.feature_index.index(feature)

        # print("vocab index size: {}".format(self.vocab_index.size()))
        # print("feature index size: {}".format(self.feature_index.size()))

        self.vocab_index.frozen = True
        self.feature_index.frozen = True
Esempio n. 5
0
def evaluate():
    total_correct = 0
    total_examples = 0
    training_accuracies = []
    start_time = datetime.now()
    if not FLAGS.reset_model:
        model = Model()
    for session_id in dataset.get_session_ids():
        if FLAGS.filter_session is not None and session_id != FLAGS.filter_session:
            continue
        if FLAGS.reset_model:
            model = Model()
        session_correct = 0
        session_examples = 0
        session_correct_list = []

        session_data = list(dataset.get_session_data(session_id))

        if not FLAGS.verbose:
            session_data = tqdm.tqdm(session_data, ncols=80, desc=session_id)

        for example_ix, (state, language,
                         target_output) in enumerate(session_data):
            acc = session_correct / session_examples if session_examples > 0 else 0
            if FLAGS.verbose:
                print("{}: {} / {}\tacc: {:.4f}".format(
                    session_id, example_ix, len(session_data), acc))
            else:
                session_data.set_postfix({'acc': acc})

            predicted = model.predict(state, language)

            if predicted == target_output:
                session_correct += 1
                session_correct_list.append(1)
            else:
                session_correct_list.append(0)
            session_examples += 1

            model.update(state, language, target_output)
            training_accuracies.append(model.training_accuracy())
            # if session_examples > 2:
            #     return

        if FLAGS.correctness_log is not None:
            with open(FLAGS.correctness_log, 'a') as f:
                f.write(' '.join(str(c) for c in session_correct_list) + '\n')

        print("this accuracy: {} {} {}".format(
            datetime.now() - start_time, session_id,
            session_correct / session_examples))
        total_correct += session_correct
        total_examples += session_examples

    print('overall accuracy: %s%%' % (100 * total_correct / total_examples))
    print('average training accuracy: %s%%' %
          (100 * np.mean(training_accuracies)))
Esempio n. 6
0
    def __init__(self):
        self.vocab = []
        self.vocab_id_map = {}

        for session_id in dataset.get_session_ids():
            for (_, language, _) in dataset.get_session_data(session_id):
                tokens = language.split(' ')
                for token in tokens:
                    if token not in self.vocab_id_map:
                        new_id = len(self.vocab)
                        self.vocab.append(token)
                        self.vocab_id_map[token] = new_id
Esempio n. 7
0
def evaluate():
    total_correct = 0
    total_examples = 0
    training_accuracies = []
    start_time = datetime.now()
    count = 0

    for session_id in dataset.get_session_ids():
        model = Model()
        session_correct = 0
        session_examples = 0
        session_correct_list = []
        session_data_count = 0

        for state, language, target_output in tqdm.tqdm(dataset.get_session_data(session_id)):
            print(str(count) + " : " + str(session_id) + " : " + str(session_data_count))
            # print(state)
            # print(language)
            predicted = model.predict(state, language)
            #print(predicted)
            #print(target_output)
            #print()

            if predicted == target_output:
                session_correct += 1
                session_correct_list.append(1)
            else:
                session_correct_list.append(0)
            session_examples += 1
            
            model.update(state, language, target_output)
            training_accuracies.append(model.training_accuracy())
            session_data_count += 1

        if FLAGS.correctness_log is not None:
            with open(FLAGS.correctness_log, 'a') as f:
                f.write(' '.join(str(c) for c in session_correct_list) + '\n')

        count += 1
        
        with open("dataset_sessions_accuracies.txt", 'a') as f:
            f.write(str(datetime.now()-start_time) + " " + str(session_id) + " " + str(session_correct/session_examples) + " \n")
        print(datetime.now()-start_time, session_id, session_correct/session_examples)
        total_correct += session_correct
        total_examples += session_examples

    print('overall accuracy: %s%%' % (100*total_correct/total_examples))
    print('average training accuracy: %s%%' % (100*np.mean(training_accuracies)))
def evaluate_meta():
    session_ids = list(sorted(dataset.get_session_ids()))
    # don't adjust this seed, for consistency
    rng = random.Random(1)
    rng.shuffle(session_ids)
    if FLAGS.limit_sessions is not None:
        session_ids = session_ids[:FLAGS.limit_sessions]
    N_train = int(len(session_ids) * 0.8)
    N_val = int(len(session_ids) * 0.1)
    N_test = len(session_ids) - N_train - N_val
    train_session_ids = session_ids[:N_train]
    val_session_ids = session_ids[N_train:N_train + N_val]
    test_session_ids = session_ids[-N_test:]
    print(f"{len(train_session_ids)} train sessions")
    print(f"{len(val_session_ids)} val sessions")
    print(f"{len(test_session_ids)} test sessions")
    assert not (set(train_session_ids)
                & set(test_session_ids)), "overlap between train and test!"
    assert not (set(val_session_ids)
                & set(test_session_ids)), "overlap between val and test!"
    assert not (set(val_session_ids)
                & set(train_session_ids)), "overlap between train and val!"

    model = Model()

    if FLAGS.training == 'multi':
        model = train_multi(model, train_session_ids, val_session_ids)
    elif FLAGS.training == 'multi_unmixed':
        model = train_unmixed(model,
                              train_session_ids,
                              val_session_ids,
                              updates='multi')
    elif FLAGS.training == 'reptile':
        # reptile does update on each session; ensure training matches test
        assert FLAGS.update_model_on_each_session
        model = train_unmixed(model,
                              train_session_ids,
                              val_session_ids,
                              updates='reptile')
    elif FLAGS.training == 'none':
        pass
        val_stats = test_sessions(model, val_session_ids, name='val')
    test_stats = test_sessions(model, test_session_ids, name='test')