def topKCandidatesAccuracyPlot(k, n): start_time = datetime.now() topKAccuracy = [] x = list(range(5,501)) for session_id in dataset.get_session_ids(): count = 0 model = Model() for state, language, target_output in tqdm.tqdm(dataset.get_session_data(session_id)): if count == 65: break tuple_state = tuple([tuple(state[i]) for i in range(len(state))]) tuple_target_output = tuple([tuple(target_output[i]) for i in range(len(target_output))]) tup = (tuple_state, language, tuple_target_output) # Add top K candidates list for this (state, language, target output) to session_data k_candidate_success = topKCandidatesPlot(state, language, target_output, model) if topKAccuracy = []: topKAccuracy = k_candidate_success else: topKAccuracy = [x + y for x, y in zip(topKAccuracy, k_candidate_success)] # Update model, as is done in evaluate() in evaluate.py model.update(state, language, target_output) count += 1 break
def evaluate_batch(data_size, test_size=500): results = [] for session_id in dataset.get_session_ids(): model = Model() session_data = list(dataset.get_session_data(session_id)) assert len(session_data) > data_size+test_size for state, language, target_output in session_data[:data_size]: model.update(state, language, target_output, 0) for i in range(50): model.optimizer_step() print(' training accuracy: %s%%' % (100*model.training_accuracy())) total_correct = 0 total_examples = 0 for state, language, target_output in session_data[-test_size:]: predicted = model.predict(state, language) if predicted == target_output: total_correct += 1 total_examples += 1 print(' test accuracy: %s%%' % (100*total_correct/total_examples)) results.append(total_correct/total_examples) print('average test accuracy: %s%%' % (100*np.mean(results)))
def topKCandidatesAccuracyBatched(k, n): # sessions will have key-value pairs of session_id, session_data # sessions = dict() for session_id in dataset.get_session_ids(): count = 0 number_accurate = 0 model = Model() for state, language, target_output in tqdm.tqdm(dataset.get_session_data(session_id)): if count == n: break tuple_state = tuple([tuple(state[i]) for i in range(len(state))]) tuple_target_output = tuple([tuple(target_output[i]) for i in range(len(target_output))]) tup = (tuple_state, language, tuple_target_output) # Add top K candidates list for this (state, language, target output) to session_data k_candidate_success = topKCandidatesHelper(k, state, language, target_output, model) if k_candidate_success != float('inf'): number_accurate += 1 # Update model, as is done in evaluate() in evaluate.py model.update(state, language, target_output) count += 1 print("Top K accuracy: " + str(number_accurate / count)) with open("dataset_sessions_top_k_accuracies.txt", 'a') as f: f.write(str(datetime.now()-start_time) + " " + str(session_id) + " " + str(number_accurate/count) + " \n")
def __init__(self): self.vocab = [] self.vocab_id_map = {} self.vocab_index = Index() self.feature_index = Index() # tokenizer special_cases = { Vocabulary.START: [{ ORTH: Vocabulary.START }], Vocabulary.END: [{ ORTH: Vocabulary.END }] } self.tokenizer = Tokenizer(English().vocab, rules=special_cases) self.token_count = Counter() for session_id in dataset.get_session_ids(): for (_, language, _) in dataset.get_session_data(session_id): tokens = self.raw_tokens(language, unk=False) self.token_count.update(tokens) for token, count in self.token_count.most_common(): if count > FLAGS.unk_threshold: self.vocab_index.index(token) feature_count = Counter() for session_id in dataset.get_session_ids(): for (_, language, _) in dataset.get_session_data(session_id): # tokens = self.raw_tokens(language) # for token in tokens: # self.vocab_index.index(token) features = self.raw_features(language) feature_count.update(features) for feature, count in feature_count.most_common(): self.feature_index.index(feature) # print("vocab index size: {}".format(self.vocab_index.size())) # print("feature index size: {}".format(self.feature_index.size())) self.vocab_index.frozen = True self.feature_index.frozen = True
def evaluate(): total_correct = 0 total_examples = 0 training_accuracies = [] start_time = datetime.now() if not FLAGS.reset_model: model = Model() for session_id in dataset.get_session_ids(): if FLAGS.filter_session is not None and session_id != FLAGS.filter_session: continue if FLAGS.reset_model: model = Model() session_correct = 0 session_examples = 0 session_correct_list = [] session_data = list(dataset.get_session_data(session_id)) if not FLAGS.verbose: session_data = tqdm.tqdm(session_data, ncols=80, desc=session_id) for example_ix, (state, language, target_output) in enumerate(session_data): acc = session_correct / session_examples if session_examples > 0 else 0 if FLAGS.verbose: print("{}: {} / {}\tacc: {:.4f}".format( session_id, example_ix, len(session_data), acc)) else: session_data.set_postfix({'acc': acc}) predicted = model.predict(state, language) if predicted == target_output: session_correct += 1 session_correct_list.append(1) else: session_correct_list.append(0) session_examples += 1 model.update(state, language, target_output) training_accuracies.append(model.training_accuracy()) # if session_examples > 2: # return if FLAGS.correctness_log is not None: with open(FLAGS.correctness_log, 'a') as f: f.write(' '.join(str(c) for c in session_correct_list) + '\n') print("this accuracy: {} {} {}".format( datetime.now() - start_time, session_id, session_correct / session_examples)) total_correct += session_correct total_examples += session_examples print('overall accuracy: %s%%' % (100 * total_correct / total_examples)) print('average training accuracy: %s%%' % (100 * np.mean(training_accuracies)))
def __init__(self): self.vocab = [] self.vocab_id_map = {} for session_id in dataset.get_session_ids(): for (_, language, _) in dataset.get_session_data(session_id): tokens = language.split(' ') for token in tokens: if token not in self.vocab_id_map: new_id = len(self.vocab) self.vocab.append(token) self.vocab_id_map[token] = new_id
def evaluate(): total_correct = 0 total_examples = 0 training_accuracies = [] start_time = datetime.now() count = 0 for session_id in dataset.get_session_ids(): model = Model() session_correct = 0 session_examples = 0 session_correct_list = [] session_data_count = 0 for state, language, target_output in tqdm.tqdm(dataset.get_session_data(session_id)): print(str(count) + " : " + str(session_id) + " : " + str(session_data_count)) # print(state) # print(language) predicted = model.predict(state, language) #print(predicted) #print(target_output) #print() if predicted == target_output: session_correct += 1 session_correct_list.append(1) else: session_correct_list.append(0) session_examples += 1 model.update(state, language, target_output) training_accuracies.append(model.training_accuracy()) session_data_count += 1 if FLAGS.correctness_log is not None: with open(FLAGS.correctness_log, 'a') as f: f.write(' '.join(str(c) for c in session_correct_list) + '\n') count += 1 with open("dataset_sessions_accuracies.txt", 'a') as f: f.write(str(datetime.now()-start_time) + " " + str(session_id) + " " + str(session_correct/session_examples) + " \n") print(datetime.now()-start_time, session_id, session_correct/session_examples) total_correct += session_correct total_examples += session_examples print('overall accuracy: %s%%' % (100*total_correct/total_examples)) print('average training accuracy: %s%%' % (100*np.mean(training_accuracies)))
def evaluate_meta(): session_ids = list(sorted(dataset.get_session_ids())) # don't adjust this seed, for consistency rng = random.Random(1) rng.shuffle(session_ids) if FLAGS.limit_sessions is not None: session_ids = session_ids[:FLAGS.limit_sessions] N_train = int(len(session_ids) * 0.8) N_val = int(len(session_ids) * 0.1) N_test = len(session_ids) - N_train - N_val train_session_ids = session_ids[:N_train] val_session_ids = session_ids[N_train:N_train + N_val] test_session_ids = session_ids[-N_test:] print(f"{len(train_session_ids)} train sessions") print(f"{len(val_session_ids)} val sessions") print(f"{len(test_session_ids)} test sessions") assert not (set(train_session_ids) & set(test_session_ids)), "overlap between train and test!" assert not (set(val_session_ids) & set(test_session_ids)), "overlap between val and test!" assert not (set(val_session_ids) & set(train_session_ids)), "overlap between train and val!" model = Model() if FLAGS.training == 'multi': model = train_multi(model, train_session_ids, val_session_ids) elif FLAGS.training == 'multi_unmixed': model = train_unmixed(model, train_session_ids, val_session_ids, updates='multi') elif FLAGS.training == 'reptile': # reptile does update on each session; ensure training matches test assert FLAGS.update_model_on_each_session model = train_unmixed(model, train_session_ids, val_session_ids, updates='reptile') elif FLAGS.training == 'none': pass val_stats = test_sessions(model, val_session_ids, name='val') test_stats = test_sessions(model, test_session_ids, name='test')