def run_cross_val(data, out): with open(data, 'r') as f: pc, commands = pickle.load(f) all_commands = commands['L0'] + commands['L1'] l0_len, l1_len, l2_len = len(pc['L0']), len(pc['L1']), len(pc['L2']) l0_range = range(0, l0_len, l0_len / FOLDS) l1_range = range(0, l1_len, l1_len / FOLDS) l2_range = range(0, l2_len, l2_len / FOLDS) assert(len(l0_range) == len(l1_range) == len(l2_range) == 11) reward_selection = [] for i in range(FOLDS): random.seed(21) val = {'L0': pc['L0'][l0_range[i]:l0_range[i + 1]], 'L1': pc['L1'][l1_range[i]:l1_range[i + 1]], 'L2': pc['L2'][l2_range[i]:l2_range[i + 1]]} l0_train = pc['L0'][:l0_range[i]] + pc['L0'][l0_range[i + 1]:] l1_train = pc['L1'][:l1_range[i]] + pc['L1'][l1_range[i + 1]:] l2_train = pc['L2'][:l2_range[i]] + pc['L2'][l2_range[i + 1]:] joint_dataset = l0_train + l1_train + l2_train random.shuffle(joint_dataset) joint_ibm2 = IBM2(joint_dataset, 15) correct, total = 0, 0 for lvl in levels: for (example_en, example_ml) in val[lvl]: # Score Translations best_trans, best_score = None, 0.0 for t in all_commands: score = joint_ibm2.score(example_en, t) if score >= best_score: best_trans, best_score = t, score print "Correct:", example_ml, "Predicted:", best_trans, "Score:", best_score if best_trans == example_ml: correct += 1 total += 1 print 'Reward Selection:', float(correct) / float(total) reward_selection.append(float(correct) / float(total)) with open(out_path, 'w') as f: f.write("Fold Reward Function Accuracies: %s\n" % str(reward_selection)) f.write("Average Reward Function Accuracy: %s\n" % str(sum(reward_selection) / len(reward_selection)))
def train_model(level, test_level, data): with open(data, 'r') as f: pc, commands = pickle.load(f) if level != test_level: pc_train, pc_test = pc[level], pc[test_level] else: pc_train, pc_test = pc[level][:int(0.9 * len(pc[level]) )], pc[level][int(0.9 * len(pc[level])):] shuffle(pc_train) shuffle(pc_train) shuffle(pc_train) shuffle(pc_test) shuffle(pc_test) shuffle(pc_test) all_commands = commands[level] joint_ibm2 = IBM2(pc_train, 15) correct, total = 0, 0 for (example_en, example_ml) in pc_test: if level != test_level and not (level in ['L1', 'L2'] and test_level in ['L1', 'L2']): example_ml = rf_map[" ".join(example_ml)].split(" ") # Score Translations best_trans, best_score = None, 0.0 for t in all_commands: score = joint_ibm2.score(example_en, t) if score >= best_score: best_trans, best_score = t, score if best_trans == example_ml: correct += 1 total += 1 #print 'Test Accuracy:', float(correct) / float(total) sys.stdout.write(str(float(correct) / float(total)) + ',') sys.stdout.flush() print
def data_curve(save_id, step=20, save_fig=False): """ Plots accuracy over number of examples, across all-levels. """ data = {} for lvl in levels: nl_tokens, ml_tokens = get_tokens(nl_format % lvl), get_tokens( ml_format % lvl) ml_commands = get_tokens(commands_format % lvl) pc = zip(*(nl_tokens, ml_tokens)) shuffle(pc) shuffle(pc) shuffle(pc) pc_train, pc_test = pc[:int(0.9 * len(pc))], pc[int(0.9 * len(pc)):] data[lvl] = (pc_train, pc_test, ml_commands) chunk_sizes, accuracies, level_accuracies, level_confusion = [], [], [], {} for lvl in levels: level_confusion[lvl] = {} for lvl2 in levels: level_confusion[lvl][lvl2] = 0 for chunk_size in range(step, min(map(lambda z: len(z[0]), data.values())), step): l0_dataset = list(data["L0"][0][:chunk_size]) l1_dataset = list(data["L1"][0][:chunk_size]) l2_dataset = list(data["L2"][0][:chunk_size]) joint_dataset = l0_dataset + l1_dataset + l2_dataset shuffle(joint_dataset) print 'Training IBM Model 2 on Chunk:', chunk_size models = { "L0": IBM2(l0_dataset, 15), "L1": IBM2(l1_dataset, 15), "L2": IBM2(l2_dataset, 15) } joint_ibm2 = IBM2(joint_dataset, 15) correct, total, lvl_correct = 0, 0, 0 for lvl in data: pc_test = data[lvl][1] for i in range(len(pc_test) - 1): # Get test command example_en, example_ml = pc_test[i] # Pick Level level, level_max = "", 0.0 for k in data: commands, curr_sum = data[k][2], 0.0 for c in commands: curr_sum += joint_ibm2.score(example_en, c) lvl_signal = curr_sum / len(commands) if lvl_signal >= level_max: level, level_max = k, lvl_signal level_confusion[level][lvl] += 1 if level == lvl: ml_commands = data[lvl][2] lvl_correct += 1 # Score Translations best_trans, best_score = None, 0.0 for t in ml_commands: score = models[lvl].score(example_en, t) if score > best_score: best_trans, best_score = t, score print best_trans, best_score # Update Counters if best_trans == example_ml: correct += 1 total += 1 print 'Chunk %s Level Selection Accuracy:' % str(chunk_size), float( lvl_correct) / float(total) print 'Chunk %s Test Accuracy:' % str(chunk_size), float( correct) / float(total) chunk_sizes.append(chunk_size) accuracies.append(float(correct) / float(total)) level_accuracies.append(float(lvl_correct) / float(total)) # Print Chunk Sizes, Accuracies print 'Chunk Sizes:', chunk_sizes print 'Accuracies:', accuracies print 'Level Selection Accuracies:', level_accuracies if save_fig: # Plot Data Curve plt.plot(chunk_sizes, accuracies) plt.title('Dual Model Data Curve') plt.xlabel('Number of Examples') plt.ylabel('Reward Function Accuracy') #plt.show() plt.savefig('./ibm_dual_data_{0}.png'.format(save_id)) plt.clf() # Plot Level Selection Accuracy Curve plt.plot(chunk_sizes, level_accuracies) plt.title('Dual Model AMDP Level Selection Data Curve') plt.xlabel('Number of Examples') plt.ylabel('Level Selection Accuracy') plt.savefig('./ibm_dual_level_{0}.png'.format(save_id)) print 'lc', level_confusion return chunk_sizes, accuracies, level_accuracies, pandas.DataFrame( level_confusion)
def get_dataframe(level, model='ibm2'): """ Given the specific level to train on, take an arbitrary 90-10 split of the level data, then build the confusion matrix (represented as a dataframe). :param level: Level to train on. :return DataFrame representing the Confusion Matrix. """ tf.reset_default_graph() # Load Data nl_tokens, ml_tokens = get_tokens(nl_format % level), get_tokens( ml_format % level) ml_commands = get_tokens(commands_format % level) pc = zip(*(nl_tokens, ml_tokens)) shuffle(pc) shuffle(pc) shuffle(pc) pc_train, pc_test = pc[:int(0.9 * len(pc))], pc[int(0.9 * len(pc)):] # Initialize Confusion Matrix confusion_matrix = {} for i in ml_commands: confusion_matrix[convert(i)] = {} for j in ml_commands: confusion_matrix[convert(i)][convert(j)] = 0 # Train Model if model == 'rnn': print 'Training RNN Classifier' m = RNNClassifier(list(pc_train), ml_commands) elif model == 'nn': print 'Training NN Classifier' m = NNClassifier(list(pc_train), ml_commands) else: print 'Training IBM Model!' m = IBM2(pc_train, 15) # Evaluate on Test Data correct, total = 0, 0 for i in range(len(pc_test) - 1): # Get test command example_en, example_ml = pc_test[i] # Score Translations if model == 'ibm2': best_trans, best_score = None, 0.0 for t in ml_commands: score = m.score(example_en, t) if score > best_score: best_trans, best_score = t, score elif model in ['rnn', 'nn']: best_trans, best_score = m.score(example_en) # Update Counters total += 1 if best_trans == example_ml: correct += 1 # Update Confusion Matrix confusion_matrix[convert(example_ml)][convert(best_trans)] += 1 # Return Matrix, Accuracy return pandas.DataFrame(confusion_matrix), float(correct) / float(total)
def train_model(): data = {} for lvl in levels: nl_tokens, ml_tokens = get_tokens(nl_format % lvl), get_tokens( ml_format % lvl) ml_commands = get_tokens(commands_format % lvl) pc = zip(*(nl_tokens, ml_tokens)) shuffle(pc) shuffle(pc) shuffle(pc) pc_train, pc_test = pc[:int(0.9 * len(pc))], pc[int(0.9 * len(pc)):] data[lvl] = (pc_train, pc_test, ml_commands) l0_pc, l1_pc, l2_pc = list(data["L0"][0][:]), list(data["L1"][0][:]), list( data["L2"][0][:]) joint_dataset = l0_pc + l1_pc + l2_pc shuffle(joint_dataset) models = { "L0": IBM2(l0_pc, 15), "L1": IBM2(l1_pc, 15), "L2": IBM2(l2_pc, 15) } joint_ibm2 = IBM2(joint_dataset, 15) correct, total, lvl_correct = 0, 0, 0 for lvl in data: pc_test = data[lvl][1] for i in range(len(pc_test) - 1): # Get test command example_en, example_ml = pc_test[i] # Pick Level level, level_max = "", 0.0 for k in data: commands, curr_sum = data[k][2], 0.0 for c in commands: curr_sum += joint_ibm2.score(example_en, c) lvl_signal = curr_sum / len(commands) if lvl_signal >= level_max: level, level_max = k, lvl_signal ml_commands = data[level][2] # Score Translations best_trans, best_score = None, 0.0 for t in ml_commands: score = models[level].score(example_en, t) if score > best_score: best_trans, best_score = t, score print best_trans, best_score # Update Counters if level == lvl: lvl_correct += 1 if best_trans == example_ml: correct += 1 total += 1 print 'Level Selection Accuracy:', float(lvl_correct) / float(total) print 'Test Accuracy:', float(correct) / float(total) with open('ibm_ckpt/models.pik', 'w') as f: pickle.dump((models, joint_ibm2), f)
def loo_data_curve(nl_level, ml_level, save_id, model='ibm2', step=20, save_fig=True): """ Performs LOO Cross-Validation, generates accuracy for the given Natural Language - Machine Language Pair. :param nl_level: Natural Language Level => One of 'L0', 'L1', or 'L2' :param ml_level: Machine Language Level => One of 'L0', 'L1', or 'L2' """ tf.reset_default_graph() nl_tokens, ml_tokens = get_tokens(nl_format % nl_level), get_tokens(ml_format % ml_level) ml_commands = get_tokens(commands_format % ml_level) pc = zip(*(nl_tokens, ml_tokens)) shuffle(pc) shuffle(pc) shuffle(pc) pc_train, pc_test = pc[:int(0.9 * len(pc))], pc[int(0.9 * len(pc)):] if model == 'rnn': m = RNNClassifier(list(pc_train), ml_commands) elif model == 'nn': m = NNClassifier(list(pc_train), ml_commands) chunk_sizes, accuracies = [], [] for chunk_size in range(step, len(pc_train), step): dataset = list(pc_train[:chunk_size]) print 'Training Model on Chunk:', chunk_size if model == 'ibm2': m = IBM2(dataset, 15) elif model in ['rnn', 'nn']: m.fit(chunk_size) correct, total = 0, 0 for i in range(len(pc_test) - 1): # Get test command example_en, example_ml = pc_test[i] # Score Translations if model == 'ibm2': best_trans, best_score = None, 0.0 for t in ml_commands: score = m.score(example_en, t) if score > best_score: best_trans, best_score = t, score elif model in ['rnn', 'nn']: best_trans, best_score = m.score(example_en) # Print Statistics print best_trans, best_score # Update Counters total += 1 if best_trans == example_ml: correct += 1 print 'Chunk %s Test Accuracy:' % str(chunk_size), float(correct) / float(total) chunk_sizes.append(chunk_size) accuracies.append(float(correct) / float(total)) # Print Chunk Sizes, Accuracies print 'Chunk Sizes:', chunk_sizes print 'Accuracies:', accuracies if save_fig: # Plot Data Curve plt.plot(chunk_sizes, accuracies) plt.title('%s - %s Data Curve' % (nl_level, ml_level)) plt.xlabel('Number of Examples') plt.ylabel('Accuracy') #plt.show() plt.savefig('./{0}_{1}_{2}_{3}.png'.format(model, nl_level, ml_level, save_id)) plt.clf() return chunk_sizes, accuracies